diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index ef3fba37817daa..c12217d549479b 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2577,6 +2577,7 @@ struct CFISnapshot { case MCCFIInstruction::OpAdjustCfaOffset: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); @@ -2715,6 +2716,7 @@ struct CFISnapshotDiff : public CFISnapshot { case MCCFIInstruction::OpAdjustCfaOffset: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); @@ -2864,6 +2866,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState, case MCCFIInstruction::OpAdjustCfaOffset: case MCCFIInstruction::OpWindowSave: case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: llvm_unreachable("unsupported CFI opcode"); diff --git a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp index 2a0cc403b726e8..3132067f3d5ece 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseDesignatedInitializersCheck.cpp @@ -80,9 +80,13 @@ unsigned getNumberOfDesignated(const InitListExpr *SyntacticInitList) { }); } -AST_MATCHER(CXXRecordDecl, isAggregate) { return Node.isAggregate(); } +AST_MATCHER(CXXRecordDecl, isAggregate) { + return Node.hasDefinition() && Node.isAggregate(); +} -AST_MATCHER(CXXRecordDecl, isPOD) { return Node.isPOD(); } +AST_MATCHER(CXXRecordDecl, isPOD) { + return Node.hasDefinition() && Node.isPOD(); +} AST_MATCHER(InitListExpr, isFullyDesignated) { if (const InitListExpr *SyntacticForm = diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 54118e5f92f417..ccebf74e8a67e7 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -216,6 +216,10 @@ Changes in existing checks a false positive when only an implicit conversion happened inside an initializer list. +- Improved :doc:`modernize-use-designated-initializers + ` check to fix a + crash when a class is declared but not defined. + - Improved :doc:`modernize-use-nullptr ` check to also recognize ``NULL``/``__null`` (but not ``0``) when used with a templated type. diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp index 9b769ad0be23ca..048665b2e54ac5 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-designated-initializers.cpp @@ -201,3 +201,11 @@ DECLARE_S93; // CHECK-MESSAGES-MACROS: :[[@LINE-1]]:1: warning: use designated initializer list to initialize 'S9' [modernize-use-designated-initializers] // CHECK-MESSAGES-MACROS: :[[@LINE-4]]:28: note: expanded from macro 'DECLARE_S93' // CHECK-MESSAGES-MACROS: :[[@LINE-71]]:1: note: aggregate type is defined here + +// Issue #113652. +struct S14; + +struct S15{ + S15(S14& d):d{d}{} + S14& d; +}; diff --git a/clang/docs/ClangFormattedStatus.rst b/clang/docs/ClangFormattedStatus.rst deleted file mode 100644 index 2475a5d4b2775b..00000000000000 --- a/clang/docs/ClangFormattedStatus.rst +++ /dev/null @@ -1,8536 +0,0 @@ -.. raw:: html - - - -.. role:: none -.. role:: part -.. role:: good -.. role:: total - -====================== -Clang Formatted Status -====================== - -:doc:`ClangFormattedStatus` describes the state of LLVM source -tree in terms of conformance to :doc:`ClangFormat` as of: March 06, 2022 17:32:26 (`830ba4cebe79 `_). - - -.. list-table:: LLVM Clang-Format Status - :widths: 50 25 25 25 25 - :header-rows: 1 - - * - Directory - - Total Files - - Formatted Files - - Unformatted Files - - % Complete - * - bolt/include/bolt/Core - - `15` - - `10` - - `5` - - :part:`66%` - * - bolt/include/bolt/Passes - - `47` - - `47` - - `0` - - :good:`100%` - * - bolt/include/bolt/Profile - - `8` - - `8` - - `0` - - :good:`100%` - * - bolt/include/bolt/Rewrite - - `5` - - `4` - - `1` - - :part:`80%` - * - bolt/include/bolt/RuntimeLibs - - `3` - - `3` - - `0` - - :good:`100%` - * - bolt/include/bolt/Utils - - `4` - - `4` - - `0` - - :good:`100%` - * - bolt/lib/Core - - `14` - - `5` - - `9` - - :part:`35%` - * - bolt/lib/Passes - - `45` - - `21` - - `24` - - :part:`46%` - * - bolt/lib/Profile - - `7` - - `3` - - `4` - - :part:`42%` - * - bolt/lib/Rewrite - - `6` - - `0` - - `6` - - :none:`0%` - * - bolt/lib/RuntimeLibs - - `3` - - `3` - - `0` - - :good:`100%` - * - bolt/lib/Target/AArch64 - - `1` - - `0` - - `1` - - :none:`0%` - * - bolt/lib/Target/X86 - - `1` - - `0` - - `1` - - :none:`0%` - * - bolt/lib/Utils - - `2` - - `1` - - `1` - - :part:`50%` - * - bolt/runtime - - `3` - - `0` - - `3` - - :none:`0%` - * - bolt/tools/driver - - `1` - - `0` - - `1` - - :none:`0%` - * - bolt/tools/heatmap - - `1` - - `1` - - `0` - - :good:`100%` - * - bolt/tools/llvm-bolt-fuzzer - - `1` - - `1` - - `0` - - :good:`100%` - * - bolt/tools/merge-fdata - - `1` - - `0` - - `1` - - :none:`0%` - * - bolt/unittests/Core - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/bindings/python/tests/cindex/INPUTS - - `5` - - `3` - - `2` - - :part:`60%` - * - clang/docs/analyzer/checkers - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/examples/AnnotateFunctions - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/examples/Attribute - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/examples/CallSuperAttribute - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/examples/PluginsOrder - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/examples/PrintFunctionNames - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/include/clang/Analysis - - `16` - - `4` - - `12` - - :part:`25%` - * - clang/include/clang/Analysis/Analyses - - `15` - - `3` - - `12` - - :part:`20%` - * - clang/include/clang/Analysis/DomainSpecific - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/include/clang/Analysis/FlowSensitive - - `16` - - `15` - - `1` - - :part:`93%` - * - clang/include/clang/Analysis/Support - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/include/clang/APINotes - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/include/clang/ARCMigrate - - `3` - - `0` - - `3` - - :none:`0%` - * - clang/include/clang/AST - - `114` - - `20` - - `94` - - :part:`17%` - * - clang/include/clang/ASTMatchers - - `5` - - `1` - - `4` - - :part:`20%` - * - clang/include/clang/ASTMatchers/Dynamic - - `4` - - `1` - - `3` - - :part:`25%` - * - clang/include/clang/Basic - - `82` - - `32` - - `50` - - :part:`39%` - * - clang/include/clang/CodeGen - - `9` - - `0` - - `9` - - :none:`0%` - * - clang/include/clang/CrossTU - - `2` - - `1` - - `1` - - :part:`50%` - * - clang/include/clang/DirectoryWatcher - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/include/clang/Driver - - `17` - - `4` - - `13` - - :part:`23%` - * - clang/include/clang/Edit - - `5` - - `1` - - `4` - - :part:`20%` - * - clang/include/clang/Format - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/include/clang/Frontend - - `28` - - `7` - - `21` - - :part:`25%` - * - clang/include/clang/FrontendTool - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/include/clang/Index - - `7` - - `2` - - `5` - - :part:`28%` - * - clang/include/clang/IndexSerialization - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/include/clang/Interpreter - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/include/clang/Lex - - `29` - - `6` - - `23` - - :part:`20%` - * - clang/include/clang/Parse - - `5` - - `2` - - `3` - - :part:`40%` - * - clang/include/clang/Rewrite/Core - - `6` - - `0` - - `6` - - :none:`0%` - * - clang/include/clang/Rewrite/Frontend - - `4` - - `0` - - `4` - - :none:`0%` - * - clang/include/clang/Sema - - `32` - - `3` - - `29` - - :part:`9%` - * - clang/include/clang/Serialization - - `14` - - `3` - - `11` - - :part:`21%` - * - clang/include/clang/StaticAnalyzer/Checkers - - `4` - - `1` - - `3` - - :part:`25%` - * - clang/include/clang/StaticAnalyzer/Core - - `5` - - `1` - - `4` - - :part:`20%` - * - clang/include/clang/StaticAnalyzer/Core/BugReporter - - `4` - - `1` - - `3` - - :part:`25%` - * - clang/include/clang/StaticAnalyzer/Core/PathSensitive - - `37` - - `10` - - `27` - - :part:`27%` - * - clang/include/clang/StaticAnalyzer/Frontend - - `5` - - `2` - - `3` - - :part:`40%` - * - clang/include/clang/Testing - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/include/clang/Tooling - - `17` - - `10` - - `7` - - :part:`58%` - * - clang/include/clang/Tooling/ASTDiff - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/include/clang/Tooling/Core - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/include/clang/Tooling/DependencyScanning - - `5` - - `5` - - `0` - - :good:`100%` - * - clang/include/clang/Tooling/Inclusions - - `3` - - `3` - - `0` - - :good:`100%` - * - clang/include/clang/Tooling/Refactoring - - `15` - - `12` - - `3` - - :part:`80%` - * - clang/include/clang/Tooling/Refactoring/Extract - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/include/clang/Tooling/Refactoring/Rename - - `6` - - `5` - - `1` - - :part:`83%` - * - clang/include/clang/Tooling/Syntax - - `5` - - `5` - - `0` - - :good:`100%` - * - clang/include/clang/Tooling/Syntax/Pseudo - - `5` - - `5` - - `0` - - :good:`100%` - * - clang/include/clang/Tooling/Transformer - - `8` - - `6` - - `2` - - :part:`75%` - * - clang/include/clang-c - - `10` - - `3` - - `7` - - :part:`30%` - * - clang/INPUTS - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/lib/Analysis - - `28` - - `3` - - `25` - - :part:`10%` - * - clang/lib/Analysis/FlowSensitive - - `7` - - `7` - - `0` - - :good:`100%` - * - clang/lib/Analysis/plugins/CheckerDependencyHandling - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/lib/Analysis/plugins/CheckerOptionHandling - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/Analysis/plugins/SampleAnalyzer - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/lib/APINotes - - `3` - - `3` - - `0` - - :good:`100%` - * - clang/lib/ARCMigrate - - `22` - - `0` - - `22` - - :none:`0%` - * - clang/lib/AST - - `81` - - `2` - - `79` - - :part:`2%` - * - clang/lib/AST/ByteCode - - `44` - - `18` - - `26` - - :part:`40%` - * - clang/lib/ASTMatchers - - `3` - - `1` - - `2` - - :part:`33%` - * - clang/lib/ASTMatchers/Dynamic - - `6` - - `1` - - `5` - - :part:`16%` - * - clang/lib/Basic - - `39` - - `13` - - `26` - - :part:`33%` - * - clang/lib/Basic/Targets - - `50` - - `25` - - `25` - - :part:`50%` - * - clang/lib/CodeGen - - `87` - - `9` - - `78` - - :part:`10%` - * - clang/lib/CrossTU - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/DirectoryWatcher - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/lib/DirectoryWatcher/default - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/DirectoryWatcher/linux - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/DirectoryWatcher/mac - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/DirectoryWatcher/windows - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/Driver - - `14` - - `2` - - `12` - - :part:`14%` - * - clang/lib/Driver/ToolChains - - `94` - - `41` - - `53` - - :part:`43%` - * - clang/lib/Driver/ToolChains/Arch - - `20` - - `7` - - `13` - - :part:`35%` - * - clang/lib/Edit - - `3` - - `0` - - `3` - - :none:`0%` - * - clang/lib/Format - - `35` - - `35` - - `0` - - :good:`100%` - * - clang/lib/Frontend - - `32` - - `4` - - `28` - - :part:`12%` - * - clang/lib/Frontend/Rewrite - - `8` - - `0` - - `8` - - :none:`0%` - * - clang/lib/FrontendTool - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/Headers - - `146` - - `14` - - `132` - - :part:`9%` - * - clang/lib/Headers/openmp_wrappers - - `5` - - `4` - - `1` - - :part:`80%` - * - clang/lib/Headers/ppc_wrappers - - `7` - - `2` - - `5` - - :part:`28%` - * - clang/lib/Index - - `11` - - `2` - - `9` - - :part:`18%` - * - clang/lib/IndexSerialization - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/lib/Interpreter - - `5` - - `5` - - `0` - - :good:`100%` - * - clang/lib/Lex - - `24` - - `1` - - `23` - - :part:`4%` - * - clang/lib/Parse - - `15` - - `1` - - `14` - - :part:`6%` - * - clang/lib/Rewrite - - `5` - - `0` - - `5` - - :none:`0%` - * - clang/lib/Sema - - `55` - - `4` - - `51` - - :part:`7%` - * - clang/lib/Serialization - - `17` - - `2` - - `15` - - :part:`11%` - * - clang/lib/StaticAnalyzer/Checkers - - `122` - - `19` - - `103` - - :part:`15%` - * - clang/lib/StaticAnalyzer/Checkers/cert - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/lib/StaticAnalyzer/Checkers/MPI-Checker - - `6` - - `0` - - `6` - - :none:`0%` - * - clang/lib/StaticAnalyzer/Checkers/RetainCountChecker - - `4` - - `0` - - `4` - - :none:`0%` - * - clang/lib/StaticAnalyzer/Checkers/UninitializedObject - - `3` - - `1` - - `2` - - :part:`33%` - * - clang/lib/StaticAnalyzer/Checkers/WebKit - - `10` - - `8` - - `2` - - :part:`80%` - * - clang/lib/StaticAnalyzer/Core - - `47` - - `10` - - `37` - - :part:`21%` - * - clang/lib/StaticAnalyzer/Frontend - - `8` - - `3` - - `5` - - :part:`37%` - * - clang/lib/Testing - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/lib/Tooling - - `16` - - `7` - - `9` - - :part:`43%` - * - clang/lib/Tooling/ASTDiff - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/lib/Tooling/Core - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/lib/Tooling/DependencyScanning - - `5` - - `4` - - `1` - - :part:`80%` - * - clang/lib/Tooling/DumpTool - - `4` - - `3` - - `1` - - :part:`75%` - * - clang/lib/Tooling/Inclusions - - `3` - - `3` - - `0` - - :good:`100%` - * - clang/lib/Tooling/Refactoring - - `5` - - `3` - - `2` - - :part:`60%` - * - clang/lib/Tooling/Refactoring/Extract - - `2` - - `1` - - `1` - - :part:`50%` - * - clang/lib/Tooling/Refactoring/Rename - - `5` - - `2` - - `3` - - :part:`40%` - * - clang/lib/Tooling/Syntax - - `7` - - `6` - - `1` - - :part:`85%` - * - clang/lib/Tooling/Syntax/Pseudo - - `8` - - `8` - - `0` - - :good:`100%` - * - clang/lib/Tooling/Transformer - - `7` - - `4` - - `3` - - :part:`57%` - * - clang/tools/amdgpu-arch - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/apinotes-test - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/arcmt-test - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/c-index-test - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/clang-check - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/clang-diff - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/clang-extdef-mapping - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/clang-format - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/clang-format/fuzzer - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/clang-fuzzer - - `6` - - `4` - - `2` - - :part:`66%` - * - clang/tools/clang-fuzzer/fuzzer-initialize - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/tools/clang-fuzzer/handle-cxx - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/tools/clang-fuzzer/handle-llvm - - `3` - - `1` - - `2` - - :part:`33%` - * - clang/tools/clang-fuzzer/proto-to-cxx - - `5` - - `0` - - `5` - - :none:`0%` - * - clang/tools/clang-fuzzer/proto-to-llvm - - `3` - - `0` - - `3` - - :none:`0%` - * - clang/tools/clang-import-test - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/clang-linker-wrapper - - `3` - - `2` - - `1` - - :part:`66%` - * - clang/tools/clang-nvlink-wrapper - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/clang-offload-bundler - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/tools/clang-offload-wrapper - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/clang-refactor - - `4` - - `4` - - `0` - - :good:`100%` - * - clang/tools/clang-repl - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/clang-scan-deps - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/clang-shlib - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/tools/diagtool - - `9` - - `0` - - `9` - - :none:`0%` - * - clang/tools/driver - - `4` - - `1` - - `3` - - :part:`25%` - * - clang/tools/libclang - - `35` - - `5` - - `30` - - :part:`14%` - * - clang/tools/scan-build-py/tests/functional/src/include - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/unittests/Analysis - - `6` - - `2` - - `4` - - :part:`33%` - * - clang/unittests/Analysis/FlowSensitive - - `14` - - `13` - - `1` - - :part:`92%` - * - clang/unittests/AST - - `30` - - `8` - - `22` - - :part:`26%` - * - clang/unittests/ASTMatchers - - `6` - - `3` - - `3` - - :part:`50%` - * - clang/unittests/ASTMatchers/Dynamic - - `3` - - `0` - - `3` - - :none:`0%` - * - clang/unittests/Basic - - `8` - - `4` - - `4` - - :part:`50%` - * - clang/unittests/CodeGen - - `6` - - `1` - - `5` - - :part:`16%` - * - clang/unittests/CrossTU - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/unittests/DirectoryWatcher - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/unittests/Driver - - `5` - - `1` - - `4` - - :part:`20%` - * - clang/unittests/Format - - `24` - - `24` - - `0` - - :good:`100%` - * - clang/unittests/Frontend - - `11` - - `7` - - `4` - - :part:`63%` - * - clang/unittests/Index - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/unittests/Interpreter - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/unittests/Interpreter/ExceptionTests - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/unittests/Introspection - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/unittests/Lex - - `8` - - `4` - - `4` - - :part:`50%` - * - clang/unittests/libclang - - `2` - - `0` - - `2` - - :none:`0%` - * - clang/unittests/libclang/CrashTests - - `1` - - `1` - - `0` - - :good:`100%` - * - clang/unittests/Rename - - `6` - - `0` - - `6` - - :none:`0%` - * - clang/unittests/Rewrite - - `2` - - `1` - - `1` - - :part:`50%` - * - clang/unittests/Sema - - `3` - - `2` - - `1` - - :part:`66%` - * - clang/unittests/Serialization - - `2` - - `2` - - `0` - - :good:`100%` - * - clang/unittests/StaticAnalyzer - - `16` - - `7` - - `9` - - :part:`43%` - * - clang/unittests/Tooling - - `30` - - `10` - - `20` - - :part:`33%` - * - clang/unittests/Tooling/RecursiveASTVisitorTests - - `30` - - `12` - - `18` - - :part:`40%` - * - clang/unittests/Tooling/Syntax - - `7` - - `3` - - `4` - - :part:`42%` - * - clang/unittests/Tooling/Syntax/Pseudo - - `4` - - `4` - - `0` - - :good:`100%` - * - clang/utils/perf-training/cxx - - `1` - - `0` - - `1` - - :none:`0%` - * - clang/utils/TableGen - - `22` - - `3` - - `19` - - :part:`13%` - * - clang-tools-extra/clang-apply-replacements/include/clang-apply-replacements/Tooling - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-apply-replacements/lib/Tooling - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-apply-replacements/tool - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-change-namespace - - `2` - - `0` - - `2` - - :none:`0%` - * - clang-tools-extra/clang-change-namespace/tool - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/clang-doc - - `17` - - `16` - - `1` - - :part:`94%` - * - clang-tools-extra/clang-doc/tool - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-include-fixer - - `13` - - `8` - - `5` - - :part:`61%` - * - clang-tools-extra/clang-include-fixer/find-all-symbols - - `17` - - `13` - - `4` - - :part:`76%` - * - clang-tools-extra/clang-include-fixer/find-all-symbols/tool - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/clang-include-fixer/plugin - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-include-fixer/tool - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/clang-move - - `4` - - `1` - - `3` - - :part:`25%` - * - clang-tools-extra/clang-move/tool - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-query - - `5` - - `4` - - `1` - - :part:`80%` - * - clang-tools-extra/clang-query/tool - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/clang-reorder-fields - - `2` - - `1` - - `1` - - :part:`50%` - * - clang-tools-extra/clang-reorder-fields/tool - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/clang-tidy - - `20` - - `14` - - `6` - - :part:`70%` - * - clang-tools-extra/clang-tidy/abseil - - `42` - - `31` - - `11` - - :part:`73%` - * - clang-tools-extra/clang-tidy/altera - - `11` - - `9` - - `2` - - :part:`81%` - * - clang-tools-extra/clang-tidy/android - - `33` - - `23` - - `10` - - :part:`69%` - * - clang-tools-extra/clang-tidy/boost - - `3` - - `3` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-tidy/bugprone - - `125` - - `106` - - `19` - - :part:`84%` - * - clang-tools-extra/clang-tidy/cert - - `29` - - `28` - - `1` - - :part:`96%` - * - clang-tools-extra/clang-tidy/concurrency - - `5` - - `4` - - `1` - - :part:`80%` - * - clang-tools-extra/clang-tidy/cppcoreguidelines - - `45` - - `42` - - `3` - - :part:`93%` - * - clang-tools-extra/clang-tidy/darwin - - `5` - - `2` - - `3` - - :part:`40%` - * - clang-tools-extra/clang-tidy/fuchsia - - `15` - - `10` - - `5` - - :part:`66%` - * - clang-tools-extra/clang-tidy/google - - `33` - - `22` - - `11` - - :part:`66%` - * - clang-tools-extra/clang-tidy/hicpp - - `9` - - `7` - - `2` - - :part:`77%` - * - clang-tools-extra/clang-tidy/linuxkernel - - `3` - - `2` - - `1` - - :part:`66%` - * - clang-tools-extra/clang-tidy/llvm - - `11` - - `10` - - `1` - - :part:`90%` - * - clang-tools-extra/clang-tidy/llvmlibc - - `7` - - `7` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-tidy/misc - - `33` - - `30` - - `3` - - :part:`90%` - * - clang-tools-extra/clang-tidy/modernize - - `67` - - `48` - - `19` - - :part:`71%` - * - clang-tools-extra/clang-tidy/mpi - - `5` - - `5` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-tidy/objc - - `17` - - `12` - - `5` - - :part:`70%` - * - clang-tools-extra/clang-tidy/openmp - - `5` - - `5` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-tidy/performance - - `31` - - `24` - - `7` - - :part:`77%` - * - clang-tools-extra/clang-tidy/plugin - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clang-tidy/portability - - `5` - - `3` - - `2` - - :part:`60%` - * - clang-tools-extra/clang-tidy/readability - - `88` - - `76` - - `12` - - :part:`86%` - * - clang-tools-extra/clang-tidy/tool - - `3` - - `2` - - `1` - - :part:`66%` - * - clang-tools-extra/clang-tidy/utils - - `35` - - `31` - - `4` - - :part:`88%` - * - clang-tools-extra/clang-tidy/zircon - - `3` - - `3` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd - - `97` - - `81` - - `16` - - :part:`83%` - * - clang-tools-extra/clangd/benchmarks - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/benchmarks/CompletionModel - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/clangd/fuzzer - - `2` - - `2` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/index - - `39` - - `36` - - `3` - - :part:`92%` - * - clang-tools-extra/clangd/index/dex - - `9` - - `7` - - `2` - - :part:`77%` - * - clang-tools-extra/clangd/index/dex/dexp - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/index/remote - - `2` - - `2` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/index/remote/marshalling - - `2` - - `2` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/index/remote/monitor - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/index/remote/server - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/index/remote/unimplemented - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/indexer - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/refactor - - `6` - - `5` - - `1` - - :part:`83%` - * - clang-tools-extra/clangd/refactor/tweaks - - `14` - - `10` - - `4` - - :part:`71%` - * - clang-tools-extra/clangd/support - - `25` - - `24` - - `1` - - :part:`96%` - * - clang-tools-extra/clangd/tool - - `2` - - `2` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/unittests - - `79` - - `66` - - `13` - - :part:`83%` - * - clang-tools-extra/clangd/unittests/decision_forest_model - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/unittests/remote - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/unittests/support - - `11` - - `11` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/unittests/tweaks - - `20` - - `19` - - `1` - - :part:`95%` - * - clang-tools-extra/clangd/unittests/xpc - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/xpc - - `3` - - `3` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/xpc/framework - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/clangd/xpc/test-client - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/modularize - - `9` - - `1` - - `8` - - :part:`11%` - * - clang-tools-extra/pp-trace - - `3` - - `1` - - `2` - - :part:`33%` - * - clang-tools-extra/tool-template - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/unittests/clang-apply-replacements - - `1` - - `1` - - `0` - - :good:`100%` - * - clang-tools-extra/unittests/clang-change-namespace - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/unittests/clang-doc - - `9` - - `9` - - `0` - - :good:`100%` - * - clang-tools-extra/unittests/clang-include-fixer - - `2` - - `0` - - `2` - - :none:`0%` - * - clang-tools-extra/unittests/clang-include-fixer/find-all-symbols - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/unittests/clang-move - - `1` - - `0` - - `1` - - :none:`0%` - * - clang-tools-extra/unittests/clang-query - - `2` - - `0` - - `2` - - :none:`0%` - * - clang-tools-extra/unittests/clang-tidy - - `16` - - `9` - - `7` - - :part:`56%` - * - clang-tools-extra/unittests/include/common - - `1` - - `0` - - `1` - - :none:`0%` - * - compiler-rt/include/fuzzer - - `1` - - `0` - - `1` - - :none:`0%` - * - compiler-rt/include/sanitizer - - `15` - - `3` - - `12` - - :part:`20%` - * - compiler-rt/include/xray - - `3` - - `2` - - `1` - - :part:`66%` - * - compiler-rt/lib/asan - - `57` - - `5` - - `52` - - :part:`8%` - * - compiler-rt/lib/asan/tests - - `17` - - `1` - - `16` - - :part:`5%` - * - compiler-rt/lib/BlocksRuntime - - `2` - - `0` - - `2` - - :none:`0%` - * - compiler-rt/lib/builtins - - `11` - - `9` - - `2` - - :part:`81%` - * - compiler-rt/lib/builtins/arm - - `1` - - `0` - - `1` - - :none:`0%` - * - compiler-rt/lib/builtins/ppc - - `1` - - `1` - - `0` - - :good:`100%` - * - compiler-rt/lib/cfi - - `1` - - `0` - - `1` - - :none:`0%` - * - compiler-rt/lib/dfsan - - `14` - - `9` - - `5` - - :part:`64%` - * - compiler-rt/lib/fuzzer - - `47` - - `9` - - `38` - - :part:`19%` - * - compiler-rt/lib/fuzzer/afl - - `1` - - `0` - - `1` - - :none:`0%` - * - compiler-rt/lib/fuzzer/dataflow - - `3` - - `0` - - `3` - - :none:`0%` - * - compiler-rt/lib/fuzzer/tests - - `2` - - `1` - - `1` - - :part:`50%` - * - compiler-rt/lib/gwp_asan - - `12` - - `12` - - `0` - - :good:`100%` - * - compiler-rt/lib/gwp_asan/optional - - `10` - - `10` - - `0` - - :good:`100%` - * - compiler-rt/lib/gwp_asan/platform_specific - - `13` - - `13` - - `0` - - :good:`100%` - * - compiler-rt/lib/gwp_asan/tests - - `15` - - `14` - - `1` - - :part:`93%` - * - compiler-rt/lib/gwp_asan/tests/platform_specific - - `1` - - `1` - - `0` - - :good:`100%` - * - compiler-rt/lib/hwasan - - `30` - - `9` - - `21` - - :part:`30%` - * - compiler-rt/lib/interception - - `8` - - `1` - - `7` - - :part:`12%` - * - compiler-rt/lib/interception/tests - - `3` - - `1` - - `2` - - :part:`33%` - * - compiler-rt/lib/lsan - - `20` - - `4` - - `16` - - :part:`20%` - * - compiler-rt/lib/memprof - - `31` - - `29` - - `2` - - :part:`93%` - * - compiler-rt/lib/memprof/tests - - `2` - - `2` - - `0` - - :good:`100%` - * - compiler-rt/lib/msan - - `18` - - `4` - - `14` - - :part:`22%` - * - compiler-rt/lib/msan/tests - - `4` - - `0` - - `4` - - :none:`0%` - * - compiler-rt/lib/orc - - `21` - - `16` - - `5` - - :part:`76%` - * - compiler-rt/lib/orc/unittests - - `10` - - `9` - - `1` - - :part:`90%` - * - compiler-rt/lib/profile - - `6` - - `0` - - `6` - - :none:`0%` - * - compiler-rt/lib/safestack - - `3` - - `1` - - `2` - - :part:`33%` - * - compiler-rt/lib/sanitizer_common - - `167` - - `29` - - `138` - - :part:`17%` - * - compiler-rt/lib/sanitizer_common/symbolizer - - `2` - - `2` - - `0` - - :good:`100%` - * - compiler-rt/lib/sanitizer_common/tests - - `46` - - `12` - - `34` - - :part:`26%` - * - compiler-rt/lib/scudo - - `20` - - `0` - - `20` - - :none:`0%` - * - compiler-rt/lib/scudo/standalone - - `49` - - `48` - - `1` - - :part:`97%` - * - compiler-rt/lib/scudo/standalone/benchmarks - - `1` - - `1` - - `0` - - :good:`100%` - * - compiler-rt/lib/scudo/standalone/fuzz - - `1` - - `1` - - `0` - - :good:`100%` - * - compiler-rt/lib/scudo/standalone/include/scudo - - `1` - - `1` - - `0` - - :good:`100%` - * - compiler-rt/lib/scudo/standalone/tests - - `25` - - `24` - - `1` - - :part:`96%` - * - compiler-rt/lib/scudo/standalone/tools - - `1` - - `1` - - `0` - - :good:`100%` - * - compiler-rt/lib/stats - - `3` - - `0` - - `3` - - :none:`0%` - * - compiler-rt/lib/tsan/benchmarks - - `6` - - `0` - - `6` - - :none:`0%` - * - compiler-rt/lib/tsan/dd - - `3` - - `0` - - `3` - - :none:`0%` - * - compiler-rt/lib/tsan/go - - `1` - - `0` - - `1` - - :none:`0%` - * - compiler-rt/lib/tsan/rtl - - `59` - - `14` - - `45` - - :part:`23%` - * - compiler-rt/lib/tsan/rtl-old - - `61` - - `13` - - `48` - - :part:`21%` - * - compiler-rt/lib/tsan/tests/rtl - - `10` - - `0` - - `10` - - :none:`0%` - * - compiler-rt/lib/tsan/tests/unit - - `11` - - `3` - - `8` - - :part:`27%` - * - compiler-rt/lib/ubsan - - `27` - - `7` - - `20` - - :part:`25%` - * - compiler-rt/lib/ubsan_minimal - - `1` - - `0` - - `1` - - :none:`0%` - * - compiler-rt/lib/xray - - `40` - - `27` - - `13` - - :part:`67%` - * - compiler-rt/lib/xray/tests/unit - - `10` - - `8` - - `2` - - :part:`80%` - * - compiler-rt/tools/gwp_asan - - `2` - - `2` - - `0` - - :good:`100%` - * - cross-project-tests/debuginfo-tests/clang_llvm_roundtrip - - `2` - - `1` - - `1` - - :part:`50%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty - - `10` - - `0` - - `10` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect - - `7` - - `0` - - `7` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address - - `7` - - `0` - - `7` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source - - `1` - - `1` - - `0` - - :good:`100%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary - - `1` - - `1` - - `0` - - :good:`100%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source - - `1` - - `1` - - `0` - - :good:`100%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/windows_noncanonical_path/source - - `1` - - `0` - - `1` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test - - `8` - - `0` - - `8` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/expect_step_kind - - `5` - - `0` - - `5` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps - - `8` - - `2` - - `6` - - :part:`25%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools - - `1` - - `0` - - `1` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect - - `2` - - `0` - - `2` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/dexter-tests - - `15` - - `3` - - `12` - - :part:`20%` - * - cross-project-tests/debuginfo-tests/llgdb-tests - - `8` - - `0` - - `8` - - :none:`0%` - * - cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb - - `2` - - `1` - - `1` - - :part:`50%` - * - flang/examples - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/examples/FlangOmpReport - - `3` - - `3` - - `0` - - :good:`100%` - * - flang/examples/PrintFlangFunctionNames - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/include/flang - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/include/flang/Common - - `21` - - `21` - - `0` - - :good:`100%` - * - flang/include/flang/Decimal - - `2` - - `2` - - `0` - - :good:`100%` - * - flang/include/flang/Evaluate - - `23` - - `23` - - `0` - - :good:`100%` - * - flang/include/flang/Frontend - - `11` - - `10` - - `1` - - :part:`90%` - * - flang/include/flang/FrontendTool - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/include/flang/Lower - - `25` - - `24` - - `1` - - :part:`96%` - * - flang/include/flang/Lower/Support - - `2` - - `2` - - `0` - - :good:`100%` - * - flang/include/flang/Optimizer/Builder - - `7` - - `7` - - `0` - - :good:`100%` - * - flang/include/flang/Optimizer/Builder/Runtime - - `10` - - `10` - - `0` - - :good:`100%` - * - flang/include/flang/Optimizer/CodeGen - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/include/flang/Optimizer/Dialect - - `5` - - `5` - - `0` - - :good:`100%` - * - flang/include/flang/Optimizer/Support - - `8` - - `8` - - `0` - - :good:`100%` - * - flang/include/flang/Optimizer/Transforms - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/include/flang/Parser - - `17` - - `16` - - `1` - - :part:`94%` - * - flang/include/flang/Runtime - - `28` - - `27` - - `1` - - :part:`96%` - * - flang/include/flang/Semantics - - `9` - - `8` - - `1` - - :part:`88%` - * - flang/lib/Common - - `4` - - `4` - - `0` - - :good:`100%` - * - flang/lib/Decimal - - `3` - - `3` - - `0` - - :good:`100%` - * - flang/lib/Evaluate - - `33` - - `31` - - `2` - - :part:`93%` - * - flang/lib/Frontend - - `8` - - `6` - - `2` - - :part:`75%` - * - flang/lib/FrontendTool - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/lib/Lower - - `20` - - `20` - - `0` - - :good:`100%` - * - flang/lib/Optimizer/Builder - - `6` - - `6` - - `0` - - :good:`100%` - * - flang/lib/Optimizer/Builder/Runtime - - `9` - - `9` - - `0` - - :good:`100%` - * - flang/lib/Optimizer/CodeGen - - `10` - - `10` - - `0` - - :good:`100%` - * - flang/lib/Optimizer/Dialect - - `5` - - `5` - - `0` - - :good:`100%` - * - flang/lib/Optimizer/Support - - `4` - - `4` - - `0` - - :good:`100%` - * - flang/lib/Optimizer/Transforms - - `10` - - `10` - - `0` - - :good:`100%` - * - flang/lib/Parser - - `35` - - `35` - - `0` - - :good:`100%` - * - flang/lib/Semantics - - `78` - - `69` - - `9` - - :part:`88%` - * - flang/module - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/runtime - - `74` - - `72` - - `2` - - :part:`97%` - * - flang/tools/bbc - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/tools/f18 - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/tools/f18-parse-demo - - `2` - - `2` - - `0` - - :good:`100%` - * - flang/tools/fir-opt - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/tools/flang-driver - - `2` - - `2` - - `0` - - :good:`100%` - * - flang/tools/tco - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/unittests/Common - - `1` - - `1` - - `0` - - :good:`100%` - * - flang/unittests/Decimal - - `2` - - `2` - - `0` - - :good:`100%` - * - flang/unittests/Evaluate - - `15` - - `15` - - `0` - - :good:`100%` - * - flang/unittests/Frontend - - `2` - - `2` - - `0` - - :good:`100%` - * - flang/unittests/Optimizer - - `4` - - `3` - - `1` - - :part:`75%` - * - flang/unittests/Optimizer/Builder - - `4` - - `4` - - `0` - - :good:`100%` - * - flang/unittests/Optimizer/Builder/Runtime - - `10` - - `10` - - `0` - - :good:`100%` - * - flang/unittests/Runtime - - `22` - - `22` - - `0` - - :good:`100%` - * - libc/AOR_v20.02/math - - `4` - - `1` - - `3` - - :part:`25%` - * - libc/AOR_v20.02/math/include - - `1` - - `0` - - `1` - - :none:`0%` - * - libc/AOR_v20.02/networking - - `1` - - `0` - - `1` - - :none:`0%` - * - libc/AOR_v20.02/networking/include - - `1` - - `0` - - `1` - - :none:`0%` - * - libc/AOR_v20.02/string - - `1` - - `0` - - `1` - - :none:`0%` - * - libc/AOR_v20.02/string/include - - `1` - - `0` - - `1` - - :none:`0%` - * - libc/benchmarks - - `15` - - `14` - - `1` - - :part:`93%` - * - libc/benchmarks/automemcpy/include/automemcpy - - `4` - - `4` - - `0` - - :good:`100%` - * - libc/benchmarks/automemcpy/lib - - `5` - - `5` - - `0` - - :good:`100%` - * - libc/benchmarks/automemcpy/unittests - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/config/linux - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/fuzzing/math - - `6` - - `6` - - `0` - - :good:`100%` - * - libc/fuzzing/stdlib - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/fuzzing/string - - `3` - - `2` - - `1` - - :part:`66%` - * - libc/include - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/include/llvm-libc-macros - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/include/llvm-libc-macros/linux - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/include/llvm-libc-types - - `28` - - `28` - - `0` - - :good:`100%` - * - libc/loader/linux/aarch64 - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/loader/linux/x86_64 - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/src/assert - - `3` - - `1` - - `2` - - :part:`33%` - * - libc/src/ctype - - `32` - - `32` - - `0` - - :good:`100%` - * - libc/src/errno - - `4` - - `4` - - `0` - - :good:`100%` - * - libc/src/fcntl - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/src/fcntl/linux - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/src/fenv - - `28` - - `28` - - `0` - - :good:`100%` - * - libc/src/inttypes - - `6` - - `6` - - `0` - - :good:`100%` - * - libc/src/math - - `91` - - `91` - - `0` - - :good:`100%` - * - libc/src/math/aarch64 - - `10` - - `10` - - `0` - - :good:`100%` - * - libc/src/math/generic - - `94` - - `94` - - `0` - - :good:`100%` - * - libc/src/math/x86_64 - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/src/signal - - `8` - - `8` - - `0` - - :good:`100%` - * - libc/src/signal/linux - - `10` - - `10` - - `0` - - :good:`100%` - * - libc/src/stdio - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/src/stdlib - - `46` - - `46` - - `0` - - :good:`100%` - * - libc/src/stdlib/linux - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/src/string - - `61` - - `61` - - `0` - - :good:`100%` - * - libc/src/string/memory_utils - - `8` - - `7` - - `1` - - :part:`87%` - * - libc/src/sys/mman - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/src/sys/mman/linux - - `2` - - `1` - - `1` - - :part:`50%` - * - libc/src/sys/stat - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/src/sys/stat/linux - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/src/threads - - `16` - - `16` - - `0` - - :good:`100%` - * - libc/src/threads/linux - - `11` - - `7` - - `4` - - :part:`63%` - * - libc/src/time - - `12` - - `12` - - `0` - - :good:`100%` - * - libc/src/unistd - - `7` - - `7` - - `0` - - :good:`100%` - * - libc/src/unistd/linux - - `7` - - `7` - - `0` - - :good:`100%` - * - libc/src/__support - - `10` - - `10` - - `0` - - :good:`100%` - * - libc/src/__support/CPP - - `11` - - `10` - - `1` - - :part:`90%` - * - libc/src/__support/File - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/src/__support/FPUtil - - `15` - - `14` - - `1` - - :part:`93%` - * - libc/src/__support/FPUtil/aarch64 - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/src/__support/FPUtil/generic - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/src/__support/FPUtil/x86_64 - - `6` - - `5` - - `1` - - :part:`83%` - * - libc/src/__support/OSUtil - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/src/__support/OSUtil/linux - - `3` - - `2` - - `1` - - :part:`66%` - * - libc/src/__support/OSUtil/linux/aarch64 - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/src/__support/OSUtil/linux/x86_64 - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/src/__support/threads - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/src/__support/threads/linux - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/utils/HdrGen - - `9` - - `9` - - `0` - - :good:`100%` - * - libc/utils/HdrGen/PrototypeTestGen - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/utils/LibcTableGenUtil - - `2` - - `2` - - `0` - - :good:`100%` - * - libc/utils/MPFRWrapper - - `3` - - `3` - - `0` - - :good:`100%` - * - libc/utils/testutils - - `10` - - `9` - - `1` - - :part:`90%` - * - libc/utils/tools/WrapperGen - - `1` - - `1` - - `0` - - :good:`100%` - * - libc/utils/UnitTest - - `12` - - `11` - - `1` - - :part:`91%` - * - libclc/generic/include - - `2` - - `1` - - `1` - - :part:`50%` - * - libclc/generic/include/clc - - `6` - - `2` - - `4` - - :part:`33%` - * - libclc/generic/include/clc/async - - `4` - - `4` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/atomic - - `11` - - `7` - - `4` - - :part:`63%` - * - libclc/generic/include/clc/cl_khr_global_int32_base_atomics - - `6` - - `5` - - `1` - - :part:`83%` - * - libclc/generic/include/clc/cl_khr_global_int32_extended_atomics - - `5` - - `5` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/cl_khr_int64_base_atomics - - `6` - - `3` - - `3` - - :part:`50%` - * - libclc/generic/include/clc/cl_khr_int64_extended_atomics - - `5` - - `5` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/cl_khr_local_int32_base_atomics - - `6` - - `5` - - `1` - - :part:`83%` - * - libclc/generic/include/clc/cl_khr_local_int32_extended_atomics - - `5` - - `5` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/common - - `6` - - `6` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/explicit_fence - - `1` - - `1` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/float - - `1` - - `0` - - `1` - - :none:`0%` - * - libclc/generic/include/clc/geometric - - `8` - - `8` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/image - - `2` - - `0` - - `2` - - :none:`0%` - * - libclc/generic/include/clc/integer - - `16` - - `13` - - `3` - - :part:`81%` - * - libclc/generic/include/clc/math - - `95` - - `92` - - `3` - - :part:`96%` - * - libclc/generic/include/clc/misc - - `2` - - `0` - - `2` - - :none:`0%` - * - libclc/generic/include/clc/relational - - `18` - - `12` - - `6` - - :part:`66%` - * - libclc/generic/include/clc/shared - - `5` - - `3` - - `2` - - :part:`60%` - * - libclc/generic/include/clc/synchronization - - `2` - - `2` - - `0` - - :good:`100%` - * - libclc/generic/include/clc/workitem - - `8` - - `8` - - `0` - - :good:`100%` - * - libclc/generic/include/integer - - `1` - - `1` - - `0` - - :good:`100%` - * - libclc/generic/include/math - - `15` - - `15` - - `0` - - :good:`100%` - * - libclc/generic/lib - - `1` - - `0` - - `1` - - :none:`0%` - * - libclc/generic/lib/math - - `8` - - `1` - - `7` - - :part:`12%` - * - libclc/generic/lib/relational - - `1` - - `0` - - `1` - - :none:`0%` - * - libclc/utils - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/benchmarks - - `28` - - `10` - - `18` - - :part:`35%` - * - libcxx/include - - `22` - - `0` - - `22` - - :none:`0%` - * - libcxx/include/__algorithm - - `102` - - `15` - - `87` - - :part:`14%` - * - libcxx/include/__bit - - `2` - - `0` - - `2` - - :none:`0%` - * - libcxx/include/__charconv - - `3` - - `0` - - `3` - - :none:`0%` - * - libcxx/include/__chrono - - `8` - - `0` - - `8` - - :none:`0%` - * - libcxx/include/__compare - - `13` - - `1` - - `12` - - :part:`7%` - * - libcxx/include/__concepts - - `22` - - `0` - - `22` - - :none:`0%` - * - libcxx/include/__coroutine - - `4` - - `0` - - `4` - - :none:`0%` - * - libcxx/include/__filesystem - - `16` - - `3` - - `13` - - :part:`18%` - * - libcxx/include/__format - - `17` - - `2` - - `15` - - :part:`11%` - * - libcxx/include/__functional - - `27` - - `0` - - `27` - - :none:`0%` - * - libcxx/include/__ios - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/include/__iterator - - `36` - - `0` - - `36` - - :none:`0%` - * - libcxx/include/__memory - - `19` - - `1` - - `18` - - :part:`5%` - * - libcxx/include/__numeric - - `13` - - `4` - - `9` - - :part:`30%` - * - libcxx/include/__random - - `37` - - `2` - - `35` - - :part:`5%` - * - libcxx/include/__ranges - - `29` - - `2` - - `27` - - :part:`6%` - * - libcxx/include/__support/android - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/include/__support/fuchsia - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/include/__support/ibm - - `6` - - `2` - - `4` - - :part:`33%` - * - libcxx/include/__support/musl - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/include/__support/newlib - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/include/__support/openbsd - - `1` - - `1` - - `0` - - :good:`100%` - * - libcxx/include/__support/solaris - - `3` - - `2` - - `1` - - :part:`66%` - * - libcxx/include/__support/win32 - - `2` - - `0` - - `2` - - :none:`0%` - * - libcxx/include/__support/xlocale - - `3` - - `0` - - `3` - - :none:`0%` - * - libcxx/include/__thread - - `2` - - `0` - - `2` - - :none:`0%` - * - libcxx/include/__utility - - `17` - - `5` - - `12` - - :part:`29%` - * - libcxx/include/__variant - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/src - - `42` - - `6` - - `36` - - :part:`14%` - * - libcxx/src/experimental - - `2` - - `1` - - `1` - - :part:`50%` - * - libcxx/src/filesystem - - `5` - - `0` - - `5` - - :none:`0%` - * - libcxx/src/include - - `6` - - `1` - - `5` - - :part:`16%` - * - libcxx/src/include/ryu - - `9` - - `8` - - `1` - - :part:`88%` - * - libcxx/src/ryu - - `3` - - `3` - - `0` - - :good:`100%` - * - libcxx/src/support/ibm - - `3` - - `0` - - `3` - - :none:`0%` - * - libcxx/src/support/solaris - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxx/src/support/win32 - - `3` - - `0` - - `3` - - :none:`0%` - * - libcxxabi/fuzz - - `1` - - `0` - - `1` - - :none:`0%` - * - libcxxabi/include - - `2` - - `0` - - `2` - - :none:`0%` - * - libcxxabi/src - - `25` - - `1` - - `24` - - :part:`4%` - * - libcxxabi/src/demangle - - `4` - - `2` - - `2` - - :part:`50%` - * - libunwind/include - - `5` - - `0` - - `5` - - :none:`0%` - * - libunwind/include/mach-o - - `1` - - `0` - - `1` - - :none:`0%` - * - libunwind/src - - `10` - - `1` - - `9` - - :part:`10%` - * - lld/COFF - - `37` - - `13` - - `24` - - :part:`35%` - * - lld/Common - - `11` - - `9` - - `2` - - :part:`81%` - * - lld/ELF - - `48` - - `25` - - `23` - - :part:`52%` - * - lld/ELF/Arch - - `14` - - `4` - - `10` - - :part:`28%` - * - lld/include/lld/Common - - `14` - - `8` - - `6` - - :part:`57%` - * - lld/include/lld/Core - - `20` - - `4` - - `16` - - :part:`20%` - * - lld/MachO - - `45` - - `43` - - `2` - - :part:`95%` - * - lld/MachO/Arch - - `6` - - `6` - - `0` - - :good:`100%` - * - lld/MinGW - - `1` - - `1` - - `0` - - :good:`100%` - * - lld/tools/lld - - `1` - - `1` - - `0` - - :good:`100%` - * - lld/wasm - - `29` - - `15` - - `14` - - :part:`51%` - * - lldb/bindings/python - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/examples/darwin/heap_find/heap - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/examples/functions - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/examples/interposing/darwin/fd_interposing - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/examples/lookup - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/examples/plugins/commands - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/examples/synthetic/bitfield - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/include/lldb - - `12` - - `6` - - `6` - - :part:`50%` - * - lldb/include/lldb/API - - `70` - - `60` - - `10` - - :part:`85%` - * - lldb/include/lldb/Breakpoint - - `25` - - `9` - - `16` - - :part:`36%` - * - lldb/include/lldb/Core - - `61` - - `31` - - `30` - - :part:`50%` - * - lldb/include/lldb/DataFormatters - - `18` - - `10` - - `8` - - :part:`55%` - * - lldb/include/lldb/Expression - - `17` - - `7` - - `10` - - :part:`41%` - * - lldb/include/lldb/Host - - `39` - - `20` - - `19` - - :part:`51%` - * - lldb/include/lldb/Host/android - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/include/lldb/Host/common - - `8` - - `2` - - `6` - - :part:`25%` - * - lldb/include/lldb/Host/freebsd - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/include/lldb/Host/linux - - `6` - - `4` - - `2` - - :part:`66%` - * - lldb/include/lldb/Host/macosx - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/include/lldb/Host/netbsd - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/include/lldb/Host/openbsd - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/include/lldb/Host/posix - - `9` - - `7` - - `2` - - :part:`77%` - * - lldb/include/lldb/Host/windows - - `10` - - `4` - - `6` - - :part:`40%` - * - lldb/include/lldb/Initialization - - `3` - - `1` - - `2` - - :part:`33%` - * - lldb/include/lldb/Interpreter - - `49` - - `36` - - `13` - - :part:`73%` - * - lldb/include/lldb/Symbol - - `35` - - `14` - - `21` - - :part:`40%` - * - lldb/include/lldb/Target - - `78` - - `51` - - `27` - - :part:`65%` - * - lldb/include/lldb/Utility - - `63` - - `41` - - `22` - - :part:`65%` - * - lldb/include/lldb/Version - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/source/API - - `73` - - `36` - - `37` - - :part:`49%` - * - lldb/source/Breakpoint - - `24` - - `6` - - `18` - - :part:`25%` - * - lldb/source/Commands - - `70` - - `57` - - `13` - - :part:`81%` - * - lldb/source/Core - - `49` - - `26` - - `23` - - :part:`53%` - * - lldb/source/DataFormatters - - `16` - - `3` - - `13` - - :part:`18%` - * - lldb/source/Expression - - `13` - - `5` - - `8` - - :part:`38%` - * - lldb/source/Host/android - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Host/common - - `31` - - `16` - - `15` - - :part:`51%` - * - lldb/source/Host/freebsd - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Host/linux - - `5` - - `5` - - `0` - - :good:`100%` - * - lldb/source/Host/macosx/cfcpp - - `14` - - `12` - - `2` - - :part:`85%` - * - lldb/source/Host/macosx/objcxx - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/source/Host/netbsd - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Host/openbsd - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Host/posix - - `9` - - `6` - - `3` - - :part:`66%` - * - lldb/source/Host/windows - - `11` - - `7` - - `4` - - :part:`63%` - * - lldb/source/Initialization - - `3` - - `3` - - `0` - - :good:`100%` - * - lldb/source/Interpreter - - `44` - - `24` - - `20` - - :part:`54%` - * - lldb/source/Plugins/ABI/AArch64 - - `6` - - `3` - - `3` - - :part:`50%` - * - lldb/source/Plugins/ABI/ARC - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/ABI/ARM - - `6` - - `2` - - `4` - - :part:`33%` - * - lldb/source/Plugins/ABI/Hexagon - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/ABI/Mips - - `6` - - `2` - - `4` - - :part:`33%` - * - lldb/source/Plugins/ABI/PowerPC - - `6` - - `3` - - `3` - - :part:`50%` - * - lldb/source/Plugins/ABI/SystemZ - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/ABI/X86 - - `13` - - `4` - - `9` - - :part:`30%` - * - lldb/source/Plugins/Architecture/AArch64 - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/Architecture/Arm - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Architecture/Mips - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/Architecture/PPC64 - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/Disassembler/LLVMC - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/DynamicLoader/Darwin-Kernel - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/DynamicLoader/Hexagon-DYLD - - `4` - - `3` - - `1` - - :part:`75%` - * - lldb/source/Plugins/DynamicLoader/MacOSX-DYLD - - `6` - - `3` - - `3` - - :part:`50%` - * - lldb/source/Plugins/DynamicLoader/POSIX-DYLD - - `4` - - `2` - - `2` - - :part:`50%` - * - lldb/source/Plugins/DynamicLoader/Static - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/DynamicLoader/wasm-DYLD - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/DynamicLoader/Windows-DYLD - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/ExpressionParser/Clang - - `51` - - `25` - - `26` - - :part:`49%` - * - lldb/source/Plugins/Instruction/ARM - - `4` - - `2` - - `2` - - :part:`50%` - * - lldb/source/Plugins/Instruction/ARM64 - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/Instruction/MIPS - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/Instruction/MIPS64 - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Instruction/PPC64 - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/InstrumentationRuntime/ASan - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/InstrumentationRuntime/TSan - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/InstrumentationRuntime/UBSan - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/JITLoader/GDB - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Language/ClangCommon - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/Language/CPlusPlus - - `30` - - `19` - - `11` - - :part:`63%` - * - lldb/source/Plugins/Language/ObjC - - `21` - - `14` - - `7` - - :part:`66%` - * - lldb/source/Plugins/Language/ObjCPlusPlus - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/LanguageRuntime/CPlusPlus - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/LanguageRuntime/CPlusPlus/ItaniumABI - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/LanguageRuntime/ObjC - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime - - `16` - - `5` - - `11` - - :part:`31%` - * - lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime - - `8` - - `3` - - `5` - - :part:`37%` - * - lldb/source/Plugins/MemoryHistory/asan - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/ObjectContainer/BSD-Archive - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/ObjectContainer/Universal-Mach-O - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/ObjectFile/Breakpad - - `4` - - `3` - - `1` - - :part:`75%` - * - lldb/source/Plugins/ObjectFile/ELF - - `4` - - `1` - - `3` - - :part:`25%` - * - lldb/source/Plugins/ObjectFile/JIT - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/ObjectFile/Mach-O - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/ObjectFile/Minidump - - `4` - - `4` - - `0` - - :good:`100%` - * - lldb/source/Plugins/ObjectFile/PDB - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/ObjectFile/PECOFF - - `6` - - `3` - - `3` - - :part:`50%` - * - lldb/source/Plugins/ObjectFile/wasm - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/OperatingSystem/Python - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/Platform/Android - - `6` - - `3` - - `3` - - :part:`50%` - * - lldb/source/Plugins/Platform/FreeBSD - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Platform/gdb-server - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Platform/Linux - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Platform/MacOSX - - `20` - - `11` - - `9` - - :part:`55%` - * - lldb/source/Plugins/Platform/MacOSX/objcxx - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/source/Plugins/Platform/NetBSD - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Platform/OpenBSD - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Platform/POSIX - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/Platform/QemuUser - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/Platform/Windows - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Process/elf-core - - `20` - - `18` - - `2` - - :part:`90%` - * - lldb/source/Plugins/Process/FreeBSD - - `16` - - `12` - - `4` - - :part:`75%` - * - lldb/source/Plugins/Process/FreeBSDKernel - - `10` - - `8` - - `2` - - :part:`80%` - * - lldb/source/Plugins/Process/gdb-remote - - `26` - - `15` - - `11` - - :part:`57%` - * - lldb/source/Plugins/Process/Linux - - `21` - - `11` - - `10` - - :part:`52%` - * - lldb/source/Plugins/Process/mach-core - - `4` - - `3` - - `1` - - :part:`75%` - * - lldb/source/Plugins/Process/MacOSX-Kernel - - `16` - - `13` - - `3` - - :part:`81%` - * - lldb/source/Plugins/Process/minidump - - `17` - - `10` - - `7` - - :part:`58%` - * - lldb/source/Plugins/Process/NetBSD - - `8` - - `4` - - `4` - - :part:`50%` - * - lldb/source/Plugins/Process/POSIX - - `8` - - `7` - - `1` - - :part:`87%` - * - lldb/source/Plugins/Process/scripted - - `4` - - `4` - - `0` - - :good:`100%` - * - lldb/source/Plugins/Process/Utility - - `132` - - `97` - - `35` - - :part:`73%` - * - lldb/source/Plugins/Process/Windows/Common - - `34` - - `22` - - `12` - - :part:`64%` - * - lldb/source/Plugins/Process/Windows/Common/arm - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Process/Windows/Common/arm64 - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/Process/Windows/Common/x64 - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/Process/Windows/Common/x86 - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/REPL/Clang - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/ScriptInterpreter/Lua - - `5` - - `5` - - `0` - - :good:`100%` - * - lldb/source/Plugins/ScriptInterpreter/None - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/ScriptInterpreter/Python - - `16` - - `12` - - `4` - - :part:`75%` - * - lldb/source/Plugins/StructuredData/DarwinLog - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/SymbolFile/Breakpad - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/SymbolFile/DWARF - - `65` - - `39` - - `26` - - :part:`60%` - * - lldb/source/Plugins/SymbolFile/NativePDB - - `20` - - `10` - - `10` - - :part:`50%` - * - lldb/source/Plugins/SymbolFile/PDB - - `6` - - `4` - - `2` - - :part:`66%` - * - lldb/source/Plugins/SymbolFile/Symtab - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/SymbolVendor/ELF - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/SymbolVendor/MacOSX - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/SymbolVendor/wasm - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/SystemRuntime/MacOSX - - `10` - - `1` - - `9` - - :part:`10%` - * - lldb/source/Plugins/Trace/common - - `8` - - `7` - - `1` - - :part:`87%` - * - lldb/source/Plugins/Trace/intel-pt - - `18` - - `17` - - `1` - - :part:`94%` - * - lldb/source/Plugins/TraceExporter/common - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/source/Plugins/TraceExporter/ctf - - `4` - - `3` - - `1` - - :part:`75%` - * - lldb/source/Plugins/TypeSystem/Clang - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/source/Plugins/UnwindAssembly/InstEmulation - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/source/Plugins/UnwindAssembly/x86 - - `4` - - `2` - - `2` - - :part:`50%` - * - lldb/source/Symbol - - `31` - - `18` - - `13` - - :part:`58%` - * - lldb/source/Target - - `69` - - `34` - - `35` - - :part:`49%` - * - lldb/source/Utility - - `58` - - `46` - - `12` - - :part:`79%` - * - lldb/source/Version - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/tools/argdumper - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/tools/darwin-debug - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/tools/debugserver/source - - `51` - - `40` - - `11` - - :part:`78%` - * - lldb/tools/debugserver/source/MacOSX - - `24` - - `16` - - `8` - - :part:`66%` - * - lldb/tools/debugserver/source/MacOSX/arm - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/tools/debugserver/source/MacOSX/arm64 - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/tools/debugserver/source/MacOSX/i386 - - `3` - - `0` - - `3` - - :none:`0%` - * - lldb/tools/debugserver/source/MacOSX/x86_64 - - `3` - - `0` - - `3` - - :none:`0%` - * - lldb/tools/driver - - `4` - - `4` - - `0` - - :good:`100%` - * - lldb/tools/intel-features - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/tools/intel-features/intel-mpx - - `2` - - `1` - - `1` - - :part:`50%` - * - lldb/tools/lldb-instr - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/tools/lldb-server - - `9` - - `4` - - `5` - - :part:`44%` - * - lldb/tools/lldb-test - - `5` - - `2` - - `3` - - :part:`40%` - * - lldb/tools/lldb-vscode - - `27` - - `24` - - `3` - - :part:`88%` - * - lldb/unittests - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/API - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/unittests/Breakpoint - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/Core - - `10` - - `9` - - `1` - - :part:`90%` - * - lldb/unittests/DataFormatter - - `3` - - `3` - - `0` - - :good:`100%` - * - lldb/unittests/debugserver - - `3` - - `2` - - `1` - - :part:`66%` - * - lldb/unittests/Disassembler - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/unittests/Editline - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/Expression - - `5` - - `3` - - `2` - - :part:`60%` - * - lldb/unittests/Host - - `16` - - `11` - - `5` - - :part:`68%` - * - lldb/unittests/Host/linux - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/unittests/Host/posix - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/Instruction - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/Interpreter - - `6` - - `2` - - `4` - - :part:`33%` - * - lldb/unittests/Language/CLanguages - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/Language/CPlusPlus - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/Language/Highlighting - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/ObjectFile/Breakpad - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/ObjectFile/ELF - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/ObjectFile/MachO - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/ObjectFile/PECOFF - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/Platform - - `3` - - `2` - - `1` - - :part:`66%` - * - lldb/unittests/Platform/Android - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/Process - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/Process/gdb-remote - - `8` - - `6` - - `2` - - :part:`75%` - * - lldb/unittests/Process/Linux - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/Process/minidump - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/unittests/Process/minidump/Inputs - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/Process/POSIX - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/Process/Utility - - `6` - - `4` - - `2` - - :part:`66%` - * - lldb/unittests/ScriptInterpreter/Lua - - `2` - - `2` - - `0` - - :good:`100%` - * - lldb/unittests/ScriptInterpreter/Python - - `3` - - `2` - - `1` - - :part:`66%` - * - lldb/unittests/Signals - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/Symbol - - `11` - - `7` - - `4` - - :part:`63%` - * - lldb/unittests/SymbolFile/DWARF - - `6` - - `4` - - `2` - - :part:`66%` - * - lldb/unittests/SymbolFile/DWARF/Inputs - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/SymbolFile/NativePDB - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/SymbolFile/PDB - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/SymbolFile/PDB/Inputs - - `5` - - `5` - - `0` - - :good:`100%` - * - lldb/unittests/Target - - `10` - - `6` - - `4` - - :part:`60%` - * - lldb/unittests/TestingSupport - - `5` - - `4` - - `1` - - :part:`80%` - * - lldb/unittests/TestingSupport/Host - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/TestingSupport/Symbol - - `3` - - `3` - - `0` - - :good:`100%` - * - lldb/unittests/Thread - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/tools/lldb-server/inferior - - `2` - - `0` - - `2` - - :none:`0%` - * - lldb/unittests/tools/lldb-server/tests - - `7` - - `0` - - `7` - - :none:`0%` - * - lldb/unittests/UnwindAssembly/ARM64 - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/UnwindAssembly/PPC64 - - `1` - - `1` - - `0` - - :good:`100%` - * - lldb/unittests/UnwindAssembly/x86 - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/unittests/Utility - - `45` - - `32` - - `13` - - :part:`71%` - * - lldb/utils/lit-cpuid - - `1` - - `0` - - `1` - - :none:`0%` - * - lldb/utils/TableGen - - `6` - - `6` - - `0` - - :good:`100%` - * - llvm/benchmarks - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/bindings/go/llvm - - `6` - - `3` - - `3` - - :part:`50%` - * - llvm/bindings/ocaml/llvm - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/cmake - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/examples/BrainF - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/examples/Bye - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/ExceptionDemo - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Fibonacci - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/HowToUseJIT - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/HowToUseLLJIT - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/IRTransforms - - `4` - - `4` - - `0` - - :good:`100%` - * - llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1 - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2 - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3 - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/examples/Kaleidoscope/BuildingAJIT/Chapter4 - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/examples/Kaleidoscope/Chapter2 - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/Kaleidoscope/Chapter3 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/Chapter4 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/Chapter5 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/Chapter6 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/Chapter7 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/Chapter8 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/Chapter9 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/include - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/Kaleidoscope/MCJIT/cached - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/examples/Kaleidoscope/MCJIT/complete - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/MCJIT/initial - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/Kaleidoscope/MCJIT/lazy - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/examples/ModuleMaker - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/OrcV2Examples - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITDumpObjects - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithCustomObjectLinkingLayer - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithExecutorProcessControl - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithGDBRegistrationListener - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithInitializers - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithLazyReexports - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithObjectCache - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithObjectLinkingLayerPlugin - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/examples/OrcV2Examples/LLJITWithThinLTOSummaries - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/ParallelJIT - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/examples/SpeculativeJIT - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/include/llvm - - `8` - - `2` - - `6` - - :part:`25%` - * - llvm/include/llvm/ADT - - `93` - - `25` - - `68` - - :part:`26%` - * - llvm/include/llvm/Analysis - - `130` - - `52` - - `78` - - :part:`40%` - * - llvm/include/llvm/Analysis/Utils - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/include/llvm/AsmParser - - `5` - - `2` - - `3` - - :part:`40%` - * - llvm/include/llvm/BinaryFormat - - `15` - - `8` - - `7` - - :part:`53%` - * - llvm/include/llvm/Bitcode - - `7` - - `2` - - `5` - - :part:`28%` - * - llvm/include/llvm/Bitstream - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/include/llvm/CodeGen - - `158` - - `51` - - `107` - - :part:`32%` - * - llvm/include/llvm/CodeGen/GlobalISel - - `27` - - `8` - - `19` - - :part:`29%` - * - llvm/include/llvm/CodeGen/MIRParser - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/include/llvm/CodeGen/PBQP - - `5` - - `1` - - `4` - - :part:`20%` - * - llvm/include/llvm/DebugInfo - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/include/llvm/DebugInfo/CodeView - - `57` - - `40` - - `17` - - :part:`70%` - * - llvm/include/llvm/DebugInfo/DWARF - - `32` - - `14` - - `18` - - :part:`43%` - * - llvm/include/llvm/DebugInfo/GSYM - - `14` - - `4` - - `10` - - :part:`28%` - * - llvm/include/llvm/DebugInfo/MSF - - `5` - - `4` - - `1` - - :part:`80%` - * - llvm/include/llvm/DebugInfo/PDB - - `50` - - `30` - - `20` - - :part:`60%` - * - llvm/include/llvm/DebugInfo/PDB/DIA - - `20` - - `9` - - `11` - - :part:`45%` - * - llvm/include/llvm/DebugInfo/PDB/Native - - `54` - - `35` - - `19` - - :part:`64%` - * - llvm/include/llvm/DebugInfo/Symbolize - - `5` - - `3` - - `2` - - :part:`60%` - * - llvm/include/llvm/Debuginfod - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/include/llvm/Demangle - - `7` - - `3` - - `4` - - :part:`42%` - * - llvm/include/llvm/DWARFLinker - - `4` - - `4` - - `0` - - :good:`100%` - * - llvm/include/llvm/DWP - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/include/llvm/ExecutionEngine - - `12` - - `2` - - `10` - - :part:`16%` - * - llvm/include/llvm/ExecutionEngine/JITLink - - `16` - - `14` - - `2` - - :part:`87%` - * - llvm/include/llvm/ExecutionEngine/Orc - - `38` - - `29` - - `9` - - :part:`76%` - * - llvm/include/llvm/ExecutionEngine/Orc/Shared - - `8` - - `4` - - `4` - - :part:`50%` - * - llvm/include/llvm/ExecutionEngine/Orc/TargetProcess - - `7` - - `7` - - `0` - - :good:`100%` - * - llvm/include/llvm/FileCheck - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/include/llvm/Frontend/OpenMP - - `5` - - `4` - - `1` - - :part:`80%` - * - llvm/include/llvm/FuzzMutate - - `6` - - `0` - - `6` - - :none:`0%` - * - llvm/include/llvm/InterfaceStub - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/include/llvm/IR - - `93` - - `28` - - `65` - - :part:`30%` - * - llvm/include/llvm/IRReader - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/include/llvm/LineEditor - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/include/llvm/Linker - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/include/llvm/LTO - - `4` - - `1` - - `3` - - :part:`25%` - * - llvm/include/llvm/LTO/legacy - - `4` - - `0` - - `4` - - :none:`0%` - * - llvm/include/llvm/MC - - `74` - - `24` - - `50` - - :part:`32%` - * - llvm/include/llvm/MC/MCDisassembler - - `4` - - `1` - - `3` - - :part:`25%` - * - llvm/include/llvm/MC/MCParser - - `8` - - `3` - - `5` - - :part:`37%` - * - llvm/include/llvm/MCA - - `10` - - `10` - - `0` - - :good:`100%` - * - llvm/include/llvm/MCA/HardwareUnits - - `6` - - `4` - - `2` - - :part:`66%` - * - llvm/include/llvm/MCA/Stages - - `8` - - `8` - - `0` - - :good:`100%` - * - llvm/include/llvm/ObjCopy - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/include/llvm/ObjCopy/COFF - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/include/llvm/ObjCopy/ELF - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/include/llvm/ObjCopy/MachO - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/include/llvm/ObjCopy/wasm - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/include/llvm/ObjCopy/XCOFF - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/include/llvm/Object - - `31` - - `12` - - `19` - - :part:`38%` - * - llvm/include/llvm/ObjectYAML - - `16` - - `12` - - `4` - - :part:`75%` - * - llvm/include/llvm/Option - - `5` - - `1` - - `4` - - :part:`20%` - * - llvm/include/llvm/Passes - - `4` - - `2` - - `2` - - :part:`50%` - * - llvm/include/llvm/ProfileData - - `11` - - `5` - - `6` - - :part:`45%` - * - llvm/include/llvm/ProfileData/Coverage - - `3` - - `2` - - `1` - - :part:`66%` - * - llvm/include/llvm/Remarks - - `12` - - `11` - - `1` - - :part:`91%` - * - llvm/include/llvm/Support - - `186` - - `68` - - `118` - - :part:`36%` - * - llvm/include/llvm/Support/FileSystem - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/include/llvm/Support/Solaris/sys - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/include/llvm/Support/Windows - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/include/llvm/TableGen - - `9` - - `3` - - `6` - - :part:`33%` - * - llvm/include/llvm/Target - - `6` - - `2` - - `4` - - :part:`33%` - * - llvm/include/llvm/Testing/Support - - `3` - - `2` - - `1` - - :part:`66%` - * - llvm/include/llvm/TextAPI - - `9` - - `9` - - `0` - - :good:`100%` - * - llvm/include/llvm/ToolDrivers/llvm-dlltool - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/include/llvm/ToolDrivers/llvm-lib - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/include/llvm/Transforms - - `8` - - `2` - - `6` - - :part:`25%` - * - llvm/include/llvm/Transforms/AggressiveInstCombine - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/include/llvm/Transforms/Coroutines - - `4` - - `4` - - `0` - - :good:`100%` - * - llvm/include/llvm/Transforms/InstCombine - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/include/llvm/Transforms/Instrumentation - - `17` - - `10` - - `7` - - :part:`58%` - * - llvm/include/llvm/Transforms/IPO - - `38` - - `28` - - `10` - - :part:`73%` - * - llvm/include/llvm/Transforms/Scalar - - `75` - - `47` - - `28` - - :part:`62%` - * - llvm/include/llvm/Transforms/Utils - - `74` - - `44` - - `30` - - :part:`59%` - * - llvm/include/llvm/Transforms/Vectorize - - `5` - - `1` - - `4` - - :part:`20%` - * - llvm/include/llvm/WindowsDriver - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/include/llvm/WindowsManifest - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/include/llvm/WindowsResource - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/include/llvm/XRay - - `17` - - `13` - - `4` - - :part:`76%` - * - llvm/include/llvm-c - - `27` - - `12` - - `15` - - :part:`44%` - * - llvm/include/llvm-c/Transforms - - `9` - - `3` - - `6` - - :part:`33%` - * - llvm/lib/Analysis - - `119` - - `40` - - `79` - - :part:`33%` - * - llvm/lib/AsmParser - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/lib/BinaryFormat - - `13` - - `10` - - `3` - - :part:`76%` - * - llvm/lib/Bitcode/Reader - - `7` - - `2` - - `5` - - :part:`28%` - * - llvm/lib/Bitcode/Writer - - `5` - - `0` - - `5` - - :none:`0%` - * - llvm/lib/Bitstream/Reader - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/CodeGen - - `220` - - `60` - - `160` - - :part:`27%` - * - llvm/lib/CodeGen/AsmPrinter - - `45` - - `18` - - `27` - - :part:`40%` - * - llvm/lib/CodeGen/GlobalISel - - `24` - - `9` - - `15` - - :part:`37%` - * - llvm/lib/CodeGen/LiveDebugValues - - `5` - - `1` - - `4` - - :part:`20%` - * - llvm/lib/CodeGen/MIRParser - - `4` - - `1` - - `3` - - :part:`25%` - * - llvm/lib/CodeGen/SelectionDAG - - `31` - - `2` - - `29` - - :part:`6%` - * - llvm/lib/DebugInfo/CodeView - - `40` - - `23` - - `17` - - :part:`57%` - * - llvm/lib/DebugInfo/DWARF - - `28` - - `9` - - `19` - - :part:`32%` - * - llvm/lib/DebugInfo/GSYM - - `11` - - `2` - - `9` - - :part:`18%` - * - llvm/lib/DebugInfo/MSF - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/lib/DebugInfo/PDB - - `40` - - `35` - - `5` - - :part:`87%` - * - llvm/lib/DebugInfo/PDB/DIA - - `18` - - `15` - - `3` - - :part:`83%` - * - llvm/lib/DebugInfo/PDB/Native - - `50` - - `37` - - `13` - - :part:`74%` - * - llvm/lib/DebugInfo/Symbolize - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/lib/Debuginfod - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/lib/Demangle - - `6` - - `4` - - `2` - - :part:`66%` - * - llvm/lib/DWARFLinker - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/lib/DWP - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/ExecutionEngine - - `5` - - `1` - - `4` - - :part:`20%` - * - llvm/lib/ExecutionEngine/IntelJITEvents - - `5` - - `0` - - `5` - - :none:`0%` - * - llvm/lib/ExecutionEngine/Interpreter - - `4` - - `0` - - `4` - - :none:`0%` - * - llvm/lib/ExecutionEngine/JITLink - - `23` - - `15` - - `8` - - :part:`65%` - * - llvm/lib/ExecutionEngine/MCJIT - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/lib/ExecutionEngine/OProfileJIT - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/lib/ExecutionEngine/Orc - - `37` - - `22` - - `15` - - :part:`59%` - * - llvm/lib/ExecutionEngine/Orc/Shared - - `4` - - `4` - - `0` - - :good:`100%` - * - llvm/lib/ExecutionEngine/Orc/TargetProcess - - `8` - - `7` - - `1` - - :part:`87%` - * - llvm/lib/ExecutionEngine/PerfJITEvents - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/ExecutionEngine/RuntimeDyld - - `12` - - `1` - - `11` - - :part:`8%` - * - llvm/lib/ExecutionEngine/RuntimeDyld/Targets - - `10` - - `1` - - `9` - - :part:`10%` - * - llvm/lib/Extensions - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/FileCheck - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Frontend/OpenACC - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Frontend/OpenMP - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/lib/FuzzMutate - - `5` - - `2` - - `3` - - :part:`40%` - * - llvm/lib/InterfaceStub - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/lib/IR - - `69` - - `20` - - `49` - - :part:`28%` - * - llvm/lib/IRReader - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/LineEditor - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Linker - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/lib/LTO - - `7` - - `1` - - `6` - - :part:`14%` - * - llvm/lib/MC - - `65` - - `21` - - `44` - - :part:`32%` - * - llvm/lib/MC/MCDisassembler - - `6` - - `3` - - `3` - - :part:`50%` - * - llvm/lib/MC/MCParser - - `14` - - `3` - - `11` - - :part:`21%` - * - llvm/lib/MCA - - `9` - - `8` - - `1` - - :part:`88%` - * - llvm/lib/MCA/HardwareUnits - - `6` - - `4` - - `2` - - :part:`66%` - * - llvm/lib/MCA/Stages - - `8` - - `7` - - `1` - - :part:`87%` - * - llvm/lib/ObjCopy - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/lib/ObjCopy/COFF - - `7` - - `7` - - `0` - - :good:`100%` - * - llvm/lib/ObjCopy/ELF - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/lib/ObjCopy/MachO - - `9` - - `9` - - `0` - - :good:`100%` - * - llvm/lib/ObjCopy/wasm - - `7` - - `7` - - `0` - - :good:`100%` - * - llvm/lib/ObjCopy/XCOFF - - `6` - - `3` - - `3` - - :part:`50%` - * - llvm/lib/Object - - `31` - - `16` - - `15` - - :part:`51%` - * - llvm/lib/ObjectYAML - - `23` - - `9` - - `14` - - :part:`39%` - * - llvm/lib/Option - - `4` - - `0` - - `4` - - :none:`0%` - * - llvm/lib/Passes - - `6` - - `3` - - `3` - - :part:`50%` - * - llvm/lib/ProfileData - - `11` - - `4` - - `7` - - :part:`36%` - * - llvm/lib/ProfileData/Coverage - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/lib/Remarks - - `13` - - `10` - - `3` - - :part:`76%` - * - llvm/lib/Support - - `144` - - `61` - - `83` - - :part:`42%` - * - llvm/lib/Support/Unix - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/TableGen - - `15` - - `3` - - `12` - - :part:`20%` - * - llvm/lib/Target - - `5` - - `1` - - `4` - - :part:`20%` - * - llvm/lib/Target/AArch64 - - `60` - - `7` - - `53` - - :part:`11%` - * - llvm/lib/Target/AArch64/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/AArch64/Disassembler - - `4` - - `1` - - `3` - - :part:`25%` - * - llvm/lib/Target/AArch64/GISel - - `14` - - `3` - - `11` - - :part:`21%` - * - llvm/lib/Target/AArch64/MCTargetDesc - - `21` - - `6` - - `15` - - :part:`28%` - * - llvm/lib/Target/AArch64/TargetInfo - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Target/AArch64/Utils - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/lib/Target/AMDGPU - - `169` - - `38` - - `131` - - :part:`22%` - * - llvm/lib/Target/AMDGPU/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/AMDGPU/Disassembler - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/lib/Target/AMDGPU/MCA - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/AMDGPU/MCTargetDesc - - `21` - - `5` - - `16` - - :part:`23%` - * - llvm/lib/Target/AMDGPU/TargetInfo - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Target/AMDGPU/Utils - - `11` - - `4` - - `7` - - :part:`36%` - * - llvm/lib/Target/ARC - - `24` - - `19` - - `5` - - :part:`79%` - * - llvm/lib/Target/ARC/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/ARC/MCTargetDesc - - `7` - - `6` - - `1` - - :part:`85%` - * - llvm/lib/Target/ARC/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/ARM - - `76` - - `10` - - `66` - - :part:`13%` - * - llvm/lib/Target/ARM/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/ARM/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/ARM/MCTargetDesc - - `26` - - `2` - - `24` - - :part:`7%` - * - llvm/lib/Target/ARM/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/ARM/Utils - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/lib/Target/AVR - - `24` - - `23` - - `1` - - :part:`95%` - * - llvm/lib/Target/AVR/AsmParser - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/AVR/Disassembler - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/AVR/MCTargetDesc - - `20` - - `18` - - `2` - - :part:`90%` - * - llvm/lib/Target/AVR/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/BPF - - `32` - - `9` - - `23` - - :part:`28%` - * - llvm/lib/Target/BPF/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/BPF/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/BPF/MCTargetDesc - - `8` - - `1` - - `7` - - :part:`12%` - * - llvm/lib/Target/BPF/TargetInfo - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Target/CSKY - - `23` - - `23` - - `0` - - :good:`100%` - * - llvm/lib/Target/CSKY/AsmParser - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/CSKY/Disassembler - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/CSKY/MCTargetDesc - - `15` - - `14` - - `1` - - :part:`93%` - * - llvm/lib/Target/CSKY/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/Hexagon - - `80` - - `6` - - `74` - - :part:`7%` - * - llvm/lib/Target/Hexagon/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/Hexagon/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/Hexagon/MCTargetDesc - - `26` - - `6` - - `20` - - :part:`23%` - * - llvm/lib/Target/Hexagon/TargetInfo - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Target/Lanai - - `28` - - `20` - - `8` - - :part:`71%` - * - llvm/lib/Target/Lanai/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/Lanai/Disassembler - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/Lanai/MCTargetDesc - - `13` - - `12` - - `1` - - :part:`92%` - * - llvm/lib/Target/Lanai/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/LoongArch - - `19` - - `19` - - `0` - - :good:`100%` - * - llvm/lib/Target/LoongArch/MCTargetDesc - - `12` - - `12` - - `0` - - :good:`100%` - * - llvm/lib/Target/LoongArch/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/M68k - - `26` - - `25` - - `1` - - :part:`96%` - * - llvm/lib/Target/M68k/AsmParser - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/M68k/Disassembler - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/M68k/GISel - - `7` - - `6` - - `1` - - :part:`85%` - * - llvm/lib/Target/M68k/MCTargetDesc - - `12` - - `11` - - `1` - - :part:`91%` - * - llvm/lib/Target/M68k/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/Mips - - `70` - - `12` - - `58` - - :part:`17%` - * - llvm/lib/Target/Mips/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/Mips/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/Mips/MCTargetDesc - - `25` - - `6` - - `19` - - :part:`24%` - * - llvm/lib/Target/Mips/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/MSP430 - - `20` - - `0` - - `20` - - :none:`0%` - * - llvm/lib/Target/MSP430/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/MSP430/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/MSP430/MCTargetDesc - - `11` - - `3` - - `8` - - :part:`27%` - * - llvm/lib/Target/MSP430/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/NVPTX - - `44` - - `10` - - `34` - - :part:`22%` - * - llvm/lib/Target/NVPTX/MCTargetDesc - - `9` - - `6` - - `3` - - :part:`66%` - * - llvm/lib/Target/NVPTX/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/PowerPC - - `54` - - `5` - - `49` - - :part:`9%` - * - llvm/lib/Target/PowerPC/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/PowerPC/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/PowerPC/GISel - - `7` - - `7` - - `0` - - :good:`100%` - * - llvm/lib/Target/PowerPC/MCTargetDesc - - `20` - - `5` - - `15` - - :part:`25%` - * - llvm/lib/Target/PowerPC/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/RISCV - - `36` - - `17` - - `19` - - :part:`47%` - * - llvm/lib/Target/RISCV/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/RISCV/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/RISCV/MCTargetDesc - - `23` - - `13` - - `10` - - :part:`56%` - * - llvm/lib/Target/RISCV/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/Sparc - - `23` - - `3` - - `20` - - :part:`13%` - * - llvm/lib/Target/Sparc/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/Sparc/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/Sparc/MCTargetDesc - - `14` - - `4` - - `10` - - :part:`28%` - * - llvm/lib/Target/Sparc/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/SystemZ - - `41` - - `6` - - `35` - - :part:`14%` - * - llvm/lib/Target/SystemZ/AsmParser - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/SystemZ/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/SystemZ/MCTargetDesc - - `10` - - `4` - - `6` - - :part:`40%` - * - llvm/lib/Target/SystemZ/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/VE - - `24` - - `19` - - `5` - - :part:`79%` - * - llvm/lib/Target/VE/AsmParser - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/VE/Disassembler - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/VE/MCTargetDesc - - `14` - - `14` - - `0` - - :good:`100%` - * - llvm/lib/Target/VE/TargetInfo - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Target/WebAssembly - - `61` - - `44` - - `17` - - :part:`72%` - * - llvm/lib/Target/WebAssembly/AsmParser - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/lib/Target/WebAssembly/Disassembler - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Target/WebAssembly/MCTargetDesc - - `12` - - `8` - - `4` - - :part:`66%` - * - llvm/lib/Target/WebAssembly/TargetInfo - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/WebAssembly/Utils - - `4` - - `4` - - `0` - - :good:`100%` - * - llvm/lib/Target/X86 - - `82` - - `19` - - `63` - - :part:`23%` - * - llvm/lib/Target/X86/AsmParser - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/lib/Target/X86/Disassembler - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/lib/Target/X86/MCA - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/lib/Target/X86/MCTargetDesc - - `25` - - `5` - - `20` - - :part:`20%` - * - llvm/lib/Target/X86/TargetInfo - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Target/XCore - - `27` - - `2` - - `25` - - :part:`7%` - * - llvm/lib/Target/XCore/Disassembler - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Target/XCore/MCTargetDesc - - `6` - - `3` - - `3` - - :part:`50%` - * - llvm/lib/Target/XCore/TargetInfo - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/lib/Testing/Support - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/lib/TextAPI - - `11` - - `9` - - `2` - - :part:`81%` - * - llvm/lib/ToolDrivers/llvm-dlltool - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/ToolDrivers/llvm-lib - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Transforms/AggressiveInstCombine - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/lib/Transforms/CFGuard - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/Transforms/Coroutines - - `8` - - `0` - - `8` - - :none:`0%` - * - llvm/lib/Transforms/Hello - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/lib/Transforms/InstCombine - - `16` - - `1` - - `15` - - :part:`6%` - * - llvm/lib/Transforms/Instrumentation - - `21` - - `7` - - `14` - - :part:`33%` - * - llvm/lib/Transforms/IPO - - `44` - - `9` - - `35` - - :part:`20%` - * - llvm/lib/Transforms/ObjCARC - - `15` - - `4` - - `11` - - :part:`26%` - * - llvm/lib/Transforms/Scalar - - `79` - - `16` - - `63` - - :part:`20%` - * - llvm/lib/Transforms/Utils - - `78` - - `19` - - `59` - - :part:`24%` - * - llvm/lib/Transforms/Vectorize - - `22` - - `13` - - `9` - - :part:`59%` - * - llvm/lib/WindowsDriver - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/WindowsManifest - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/lib/XRay - - `14` - - `11` - - `3` - - :part:`78%` - * - llvm/tools/bugpoint - - `12` - - `1` - - `11` - - :part:`8%` - * - llvm/tools/bugpoint-passes - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/dsymutil - - `18` - - `16` - - `2` - - :part:`88%` - * - llvm/tools/gold - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llc - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/lli - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/tools/lli/ChildTarget - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-ar - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-as - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-as-fuzzer - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-bcanalyzer - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-c-test - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/tools/llvm-cat - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-cfi-verify - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-cfi-verify/lib - - `4` - - `1` - - `3` - - :part:`25%` - * - llvm/tools/llvm-config - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-cov - - `23` - - `12` - - `11` - - :part:`52%` - * - llvm/tools/llvm-cvtres - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-cxxdump - - `4` - - `1` - - `3` - - :part:`25%` - * - llvm/tools/llvm-cxxfilt - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-cxxmap - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-debuginfod-find - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-diff - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-diff/lib - - `6` - - `0` - - `6` - - :none:`0%` - * - llvm/tools/llvm-dis - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-dis-fuzzer - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-dlang-demangle-fuzzer - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/tools/llvm-dwarfdump - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/tools/llvm-dwarfdump/fuzzer - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-dwp - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-exegesis - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-exegesis/lib - - `44` - - `33` - - `11` - - :part:`75%` - * - llvm/tools/llvm-exegesis/lib/AArch64 - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-exegesis/lib/Mips - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-exegesis/lib/PowerPC - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-exegesis/lib/X86 - - `3` - - `2` - - `1` - - :part:`66%` - * - llvm/tools/llvm-extract - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-gsymutil - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-ifs - - `3` - - `2` - - `1` - - :part:`66%` - * - llvm/tools/llvm-isel-fuzzer - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/tools/llvm-itanium-demangle-fuzzer - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/tools/llvm-jitlink - - `4` - - `2` - - `2` - - :part:`50%` - * - llvm/tools/llvm-jitlink/llvm-jitlink-executor - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-jitlistener - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-libtool-darwin - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-link - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-lipo - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-lto - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-lto2 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-mc - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/tools/llvm-mc-assemble-fuzzer - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-mc-disassemble-fuzzer - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-mca - - `7` - - `7` - - `0` - - :good:`100%` - * - llvm/tools/llvm-mca/Views - - `20` - - `19` - - `1` - - :part:`95%` - * - llvm/tools/llvm-microsoft-demangle-fuzzer - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/tools/llvm-ml - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/tools/llvm-modextract - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-mt - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-nm - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-objcopy - - `3` - - `2` - - `1` - - :part:`66%` - * - llvm/tools/llvm-objdump - - `15` - - `10` - - `5` - - :part:`66%` - * - llvm/tools/llvm-opt-fuzzer - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/tools/llvm-opt-report - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-pdbutil - - `47` - - `15` - - `32` - - :part:`31%` - * - llvm/tools/llvm-profdata - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-profgen - - `11` - - `6` - - `5` - - :part:`54%` - * - llvm/tools/llvm-rc - - `12` - - `6` - - `6` - - :part:`50%` - * - llvm/tools/llvm-readobj - - `19` - - `3` - - `16` - - :part:`15%` - * - llvm/tools/llvm-reduce - - `7` - - `6` - - `1` - - :part:`85%` - * - llvm/tools/llvm-reduce/deltas - - `40` - - `39` - - `1` - - :part:`97%` - * - llvm/tools/llvm-rtdyld - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-rust-demangle-fuzzer - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/tools/llvm-shlib - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-sim - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-size - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-special-case-list-fuzzer - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/tools/llvm-split - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-stress - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-strings - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-symbolizer - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-tapi-diff - - `3` - - `3` - - `0` - - :good:`100%` - * - llvm/tools/llvm-tli-checker - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/llvm-undname - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/llvm-xray - - `19` - - `15` - - `4` - - :part:`78%` - * - llvm/tools/llvm-yaml-numeric-parser-fuzzer - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/tools/llvm-yaml-parser-fuzzer - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/tools/lto - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/tools/obj2yaml - - `10` - - `5` - - `5` - - :part:`50%` - * - llvm/tools/opt - - `10` - - `3` - - `7` - - :part:`30%` - * - llvm/tools/remarks-shlib - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/sancov - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/sanstats - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/split-file - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/verify-uselistorder - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/tools/vfabi-demangle-fuzzer - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/tools/yaml2obj - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/ADT - - `77` - - `29` - - `48` - - :part:`37%` - * - llvm/unittests/Analysis - - `38` - - `13` - - `25` - - :part:`34%` - * - llvm/unittests/AsmParser - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/BinaryFormat - - `6` - - `5` - - `1` - - :part:`83%` - * - llvm/unittests/Bitcode - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/unittests/Bitstream - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/unittests/CodeGen - - `20` - - `10` - - `10` - - :part:`50%` - * - llvm/unittests/CodeGen/GlobalISel - - `13` - - `2` - - `11` - - :part:`15%` - * - llvm/unittests/DebugInfo/CodeView - - `4` - - `2` - - `2` - - :part:`50%` - * - llvm/unittests/DebugInfo/DWARF - - `17` - - `13` - - `4` - - :part:`76%` - * - llvm/unittests/DebugInfo/GSYM - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/DebugInfo/MSF - - `3` - - `2` - - `1` - - :part:`66%` - * - llvm/unittests/DebugInfo/PDB - - `5` - - `3` - - `2` - - :part:`60%` - * - llvm/unittests/DebugInfo/PDB/Inputs - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/Debuginfod - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/unittests/Demangle - - `7` - - `5` - - `2` - - :part:`71%` - * - llvm/unittests/ExecutionEngine - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/ExecutionEngine/JITLink - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/ExecutionEngine/MCJIT - - `7` - - `0` - - `7` - - :none:`0%` - * - llvm/unittests/ExecutionEngine/Orc - - `21` - - `14` - - `7` - - :part:`66%` - * - llvm/unittests/FileCheck - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/Frontend - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/unittests/FuzzMutate - - `4` - - `0` - - `4` - - :none:`0%` - * - llvm/unittests/InterfaceStub - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/IR - - `36` - - `6` - - `30` - - :part:`16%` - * - llvm/unittests/LineEditor - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/Linker - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/MC - - `7` - - `4` - - `3` - - :part:`57%` - * - llvm/unittests/MC/AMDGPU - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/MC/SystemZ - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/MI - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/MIR - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/ObjCopy - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/Object - - `9` - - `6` - - `3` - - :part:`66%` - * - llvm/unittests/ObjectYAML - - `5` - - `3` - - `2` - - :part:`60%` - * - llvm/unittests/Option - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/unittests/Passes - - `5` - - `5` - - `0` - - :good:`100%` - * - llvm/unittests/ProfileData - - `5` - - `2` - - `3` - - :part:`40%` - * - llvm/unittests/Remarks - - `8` - - `5` - - `3` - - :part:`62%` - * - llvm/unittests/Support - - `100` - - `35` - - `65` - - :part:`35%` - * - llvm/unittests/Support/CommandLineInit - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/Support/DynamicLibrary - - `4` - - `0` - - `4` - - :none:`0%` - * - llvm/unittests/TableGen - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/unittests/Target/AArch64 - - `3` - - `1` - - `2` - - :part:`33%` - * - llvm/unittests/Target/AMDGPU - - `2` - - `2` - - `0` - - :good:`100%` - * - llvm/unittests/Target/ARM - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/unittests/Target/PowerPC - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/Target/WebAssembly - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/Target/X86 - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/Testing/Support - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/TextAPI - - `5` - - `3` - - `2` - - :part:`60%` - * - llvm/unittests/tools/llvm-cfi-verify - - `2` - - `1` - - `1` - - :part:`50%` - * - llvm/unittests/tools/llvm-exegesis - - `4` - - `3` - - `1` - - :part:`75%` - * - llvm/unittests/tools/llvm-exegesis/AArch64 - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/tools/llvm-exegesis/ARM - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/tools/llvm-exegesis/Common - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/unittests/tools/llvm-exegesis/Mips - - `5` - - `3` - - `2` - - :part:`60%` - * - llvm/unittests/tools/llvm-exegesis/PowerPC - - `4` - - `1` - - `3` - - :part:`25%` - * - llvm/unittests/tools/llvm-exegesis/X86 - - `9` - - `6` - - `3` - - :part:`66%` - * - llvm/unittests/tools/llvm-profgen - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/unittests/Transforms/IPO - - `4` - - `2` - - `2` - - :part:`50%` - * - llvm/unittests/Transforms/Scalar - - `2` - - `0` - - `2` - - :none:`0%` - * - llvm/unittests/Transforms/Utils - - `19` - - `8` - - `11` - - :part:`42%` - * - llvm/unittests/Transforms/Vectorize - - `7` - - `7` - - `0` - - :good:`100%` - * - llvm/unittests/XRay - - `8` - - `7` - - `1` - - :part:`87%` - * - llvm/utils/FileCheck - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/utils/fpcmp - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/utils/KillTheDoctor - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/utils/not - - `1` - - `1` - - `0` - - :good:`100%` - * - llvm/utils/PerfectShuffle - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/utils/TableGen - - `78` - - `13` - - `65` - - :part:`16%` - * - llvm/utils/TableGen/GlobalISel - - `17` - - `10` - - `7` - - :part:`58%` - * - llvm/utils/unittest/googlemock/include/gmock - - `12` - - `0` - - `12` - - :none:`0%` - * - llvm/utils/unittest/googlemock/include/gmock/internal - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/utils/unittest/googlemock/include/gmock/internal/custom - - `3` - - `0` - - `3` - - :none:`0%` - * - llvm/utils/unittest/googletest/include/gtest - - `11` - - `0` - - `11` - - :none:`0%` - * - llvm/utils/unittest/googletest/include/gtest/internal - - `8` - - `0` - - `8` - - :none:`0%` - * - llvm/utils/unittest/googletest/include/gtest/internal/custom - - `4` - - `0` - - `4` - - :none:`0%` - * - llvm/utils/unittest/googletest/src - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/utils/unittest/UnitTestMain - - `1` - - `0` - - `1` - - :none:`0%` - * - llvm/utils/yaml-bench - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/examples/standalone/include/Standalone - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/examples/standalone/include/Standalone-c - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/standalone/lib/CAPI - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/standalone/lib/Standalone - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/examples/standalone/python - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/standalone/standalone-opt - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/standalone/standalone-translate - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch1 - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch1/include/toy - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch1/parser - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/examples/toy/Ch2 - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch2/include/toy - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch2/mlir - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch2/parser - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/examples/toy/Ch3 - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch3/include/toy - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch3/mlir - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch3/parser - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/examples/toy/Ch4 - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch4/include/toy - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch4/mlir - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch4/parser - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/examples/toy/Ch5 - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch5/include/toy - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch5/mlir - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch5/parser - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/examples/toy/Ch6 - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch6/include/toy - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch6/mlir - - `6` - - `6` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch6/parser - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/examples/toy/Ch7 - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch7/include/toy - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch7/mlir - - `6` - - `6` - - `0` - - :good:`100%` - * - mlir/examples/toy/Ch7/parser - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/include/mlir - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/include/mlir/Analysis - - `7` - - `5` - - `2` - - :part:`71%` - * - mlir/include/mlir/Analysis/AliasAnalysis - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Analysis/Presburger - - `9` - - `9` - - `0` - - :good:`100%` - * - mlir/include/mlir/Bindings/Python - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/include/mlir/CAPI - - `12` - - `12` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/AffineToStandard - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ArithmeticToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ArithmeticToSPIRV - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ArmNeon2dToIntr - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/AsyncToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/BufferizationToMemRef - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ComplexToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ComplexToStandard - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ControlFlowToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ControlFlowToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/FuncToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/GPUCommon - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/GPUToNVVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/GPUToROCDL - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/GPUToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/GPUToVulkan - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/include/mlir/Conversion/LinalgToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/LinalgToStandard - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/LLVMCommon - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/MathToLibm - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/MathToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/MathToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/MemRefToLLVM - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/MemRefToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/OpenACCToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/OpenACCToSCF - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/OpenMPToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/PDLToPDLInterp - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ReconcileUnrealizedCasts - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/SCFToControlFlow - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/SCFToGPU - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/SCFToOpenMP - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/SCFToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/ShapeToStandard - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/SPIRVToLLVM - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/StandardToLLVM - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/TensorToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/TosaToLinalg - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/TosaToSCF - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/TosaToStandard - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/VectorToGPU - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/VectorToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/VectorToSCF - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Conversion/VectorToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Affine - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Affine/Analysis - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Affine/IR - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/AMX - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Arithmetic/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Arithmetic/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Arithmetic/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/ArmNeon - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/ArmSVE - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Async - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Async/IR - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Bufferization/IR - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Bufferization/Transforms - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Complex/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/ControlFlow/IR - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/DLTI - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/EmitC/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Func/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Func/Transforms - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/GPU - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Linalg - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Linalg/Analysis - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Linalg/IR - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Linalg/Transforms - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Linalg/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/LLVMIR - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/LLVMIR/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Math/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Math/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/MemRef/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/MemRef/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/MemRef/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/OpenACC - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/OpenMP - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/PDL/IR - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/PDLInterp/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Quant - - `6` - - `6` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SCF - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SCF/Utils - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Shape/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Shape/Transforms - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SparseTensor/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SparseTensor/Pipelines - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SparseTensor/Transforms - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SparseTensor/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SPIRV/IR - - `9` - - `9` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SPIRV/Linking - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SPIRV/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/SPIRV/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Tensor/IR - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Tensor/Transforms - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Tensor/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Tosa/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Tosa/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Tosa/Utils - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Utils - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Vector/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Vector/Transforms - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/Vector/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/X86Vector - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/ExecutionEngine - - `8` - - `7` - - `1` - - :part:`87%` - * - mlir/include/mlir/Interfaces - - `14` - - `13` - - `1` - - :part:`92%` - * - mlir/include/mlir/IR - - `49` - - `29` - - `20` - - :part:`59%` - * - mlir/include/mlir/Parser - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Pass - - `6` - - `0` - - `6` - - :none:`0%` - * - mlir/include/mlir/Reducer - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/include/mlir/Rewrite - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Support - - `15` - - `9` - - `6` - - :part:`60%` - * - mlir/include/mlir/TableGen - - `21` - - `19` - - `2` - - :part:`90%` - * - mlir/include/mlir/Target/Cpp - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR - - `6` - - `6` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/AMX - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/ArmNeon - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/ArmSVE - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/LLVMIR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/NVVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/OpenACC - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/OpenMP - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/ROCDL - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/LLVMIR/Dialect/X86Vector - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Target/SPIRV - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/include/mlir/Tools/mlir-lsp-server - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Tools/mlir-reduce - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Tools/PDLL/AST - - `4` - - `2` - - `2` - - :part:`50%` - * - mlir/include/mlir/Tools/PDLL/CodeGen - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/include/mlir/Tools/PDLL/ODS - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/include/mlir/Tools/PDLL/Parser - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Transforms - - `9` - - `7` - - `2` - - :part:`77%` - * - mlir/include/mlir-c - - `15` - - `15` - - `0` - - :good:`100%` - * - mlir/include/mlir-c/Bindings/Python - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir-c/Dialect - - `11` - - `11` - - `0` - - :good:`100%` - * - mlir/lib/Analysis - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/lib/Analysis/AliasAnalysis - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Analysis/Presburger - - `8` - - `8` - - `0` - - :good:`100%` - * - mlir/lib/Bindings/Python - - `23` - - `23` - - `0` - - :good:`100%` - * - mlir/lib/Bindings/Python/Conversions - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Bindings/Python/Transforms - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/Conversion - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/Debug - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/Dialect - - `15` - - `15` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/ExecutionEngine - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/Interfaces - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/IR - - `10` - - `10` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/Registration - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/CAPI/Transforms - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/AffineToStandard - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ArithmeticToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ArithmeticToSPIRV - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ArmNeon2dToIntr - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/AsyncToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/BufferizationToMemRef - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/lib/Conversion/ComplexToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ComplexToStandard - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ControlFlowToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ControlFlowToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/FuncToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/GPUCommon - - `5` - - `4` - - `1` - - :part:`80%` - * - mlir/lib/Conversion/GPUToNVVM - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/GPUToROCDL - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/GPUToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/GPUToVulkan - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/LinalgToSPIRV - - `2` - - `1` - - `1` - - :part:`50%` - * - mlir/lib/Conversion/LinalgToStandard - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/lib/Conversion/LLVMCommon - - `8` - - `8` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/MathToLibm - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/MathToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/MathToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/MemRefToLLVM - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/MemRefToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/OpenACCToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/OpenACCToSCF - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/OpenMPToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/PDLToPDLInterp - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ReconcileUnrealizedCasts - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/SCFToControlFlow - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/SCFToGPU - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/SCFToOpenMP - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/SCFToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/ShapeToStandard - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/SPIRVCommon - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/SPIRVToLLVM - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/StandardToLLVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/TensorToSPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/TosaToLinalg - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/TosaToSCF - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/TosaToStandard - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/VectorToGPU - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/lib/Conversion/VectorToLLVM - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/VectorToSCF - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Conversion/VectorToSPIRV - - `2` - - `1` - - `1` - - :part:`50%` - * - mlir/lib/Dialect - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Affine/Analysis - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Affine/IR - - `3` - - `2` - - `1` - - :part:`66%` - * - mlir/lib/Dialect/Affine/Transforms - - `14` - - `14` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Affine/Utils - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/AMX/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/AMX/Transforms - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Arithmetic/IR - - `2` - - `1` - - `1` - - :part:`50%` - * - mlir/lib/Dialect/Arithmetic/Transforms - - `4` - - `3` - - `1` - - :part:`75%` - * - mlir/lib/Dialect/Arithmetic/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/ArmNeon/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/ArmSVE/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/ArmSVE/Transforms - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Async/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Async/Transforms - - `6` - - `6` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Bufferization/IR - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Bufferization/Transforms - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Complex/IR - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/ControlFlow/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/DLTI - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/EmitC/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Func/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Func/Transforms - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/GPU/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/GPU/Transforms - - `9` - - `7` - - `2` - - :part:`77%` - * - mlir/lib/Dialect/Linalg/Analysis - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Linalg/ComprehensiveBufferize - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Linalg/IR - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Linalg/Transforms - - `25` - - `25` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Linalg/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/LLVMIR/IR - - `7` - - `5` - - `2` - - :part:`71%` - * - mlir/lib/Dialect/LLVMIR/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Math/IR - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Math/Transforms - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/MemRef/IR - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/MemRef/Transforms - - `7` - - `6` - - `1` - - :part:`85%` - * - mlir/lib/Dialect/MemRef/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/OpenACC/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/OpenMP/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/PDL/IR - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/PDLInterp/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Quant/IR - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Quant/Transforms - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Quant/Utils - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/SCF - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/SCF/Transforms - - `12` - - `11` - - `1` - - :part:`91%` - * - mlir/lib/Dialect/SCF/Utils - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Shape/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Shape/Transforms - - `5` - - `5` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/SparseTensor/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/SparseTensor/Pipelines - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/SparseTensor/Transforms - - `5` - - `4` - - `1` - - :part:`80%` - * - mlir/lib/Dialect/SparseTensor/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/SPIRV/IR - - `8` - - `6` - - `2` - - :part:`75%` - * - mlir/lib/Dialect/SPIRV/Linking/ModuleCombiner - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/SPIRV/Transforms - - `7` - - `6` - - `1` - - :part:`85%` - * - mlir/lib/Dialect/SPIRV/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Tensor/IR - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Tensor/Transforms - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Tensor/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Tosa/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Tosa/Transforms - - `6` - - `6` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Tosa/Utils - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Utils - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Vector/IR - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/lib/Dialect/Vector/Transforms - - `11` - - `11` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Vector/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/X86Vector/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/X86Vector/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/ExecutionEngine - - `9` - - `9` - - `0` - - :good:`100%` - * - mlir/lib/Interfaces - - `12` - - `12` - - `0` - - :good:`100%` - * - mlir/lib/IR - - `38` - - `31` - - `7` - - :part:`81%` - * - mlir/lib/Parser - - `14` - - `10` - - `4` - - :part:`71%` - * - mlir/lib/Pass - - `8` - - `6` - - `2` - - :part:`75%` - * - mlir/lib/Reducer - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/lib/Rewrite - - `4` - - `3` - - `1` - - :part:`75%` - * - mlir/lib/Support - - `8` - - `8` - - `0` - - :good:`100%` - * - mlir/lib/TableGen - - `18` - - `18` - - `0` - - :good:`100%` - * - mlir/lib/Target/Cpp - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR - - `7` - - `6` - - `1` - - :part:`85%` - * - mlir/lib/Target/LLVMIR/Dialect/AMX - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR/Dialect/ArmNeon - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR/Dialect/ArmSVE - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR/Dialect/LLVMIR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR/Dialect/NVVM - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR/Dialect/OpenACC - - `1` - - `0` - - `1` - - :none:`0%` - * - mlir/lib/Target/LLVMIR/Dialect/OpenMP - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR/Dialect/ROCDL - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/LLVMIR/Dialect/X86Vector - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Target/SPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/lib/Target/SPIRV/Deserialization - - `4` - - `3` - - `1` - - :part:`75%` - * - mlir/lib/Target/SPIRV/Serialization - - `4` - - `3` - - `1` - - :part:`75%` - * - mlir/lib/Tools/mlir-lsp-server - - `5` - - `4` - - `1` - - :part:`80%` - * - mlir/lib/Tools/mlir-lsp-server/lsp - - `6` - - `4` - - `2` - - :part:`66%` - * - mlir/lib/Tools/mlir-reduce - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/lib/Tools/PDLL/AST - - `6` - - `5` - - `1` - - :part:`83%` - * - mlir/lib/Tools/PDLL/CodeGen - - `2` - - `1` - - `1` - - :part:`50%` - * - mlir/lib/Tools/PDLL/ODS - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Tools/PDLL/Parser - - `3` - - `1` - - `2` - - :part:`33%` - * - mlir/lib/Transforms - - `13` - - `11` - - `2` - - :part:`84%` - * - mlir/lib/Transforms/Utils - - `6` - - `6` - - `0` - - :good:`100%` - * - mlir/lib/Translation - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-cpu-runner - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-linalg-ods-gen - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-lsp-server - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-opt - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-pdll - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-reduce - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-shlib - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-spirv-cpu-runner - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-tblgen - - `29` - - `28` - - `1` - - :part:`96%` - * - mlir/tools/mlir-translate - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/tools/mlir-vulkan-runner - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/unittests/Analysis/Presburger - - `8` - - `8` - - `0` - - :good:`100%` - * - mlir/unittests/Conversion/PDLToPDLInterp - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/unittests/Dialect - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/unittests/Dialect/Affine/Analysis - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/unittests/Dialect/Quant - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/unittests/Dialect/SparseTensor - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/unittests/Dialect/SPIRV - - `2` - - `2` - - `0` - - :good:`100%` - * - mlir/unittests/Dialect/Utils - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/unittests/ExecutionEngine - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/unittests/Interfaces - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/unittests/IR - - `7` - - `7` - - `0` - - :good:`100%` - * - mlir/unittests/Pass - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/unittests/Rewrite - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/unittests/Support - - `5` - - `4` - - `1` - - :part:`80%` - * - mlir/unittests/TableGen - - `5` - - `3` - - `2` - - :part:`60%` - * - mlir/unittests/Transforms - - `2` - - `2` - - `0` - - :good:`100%` - * - openmp/libompd/src - - `9` - - `9` - - `0` - - :good:`100%` - * - openmp/libomptarget/DeviceRTL/include - - `8` - - `8` - - `0` - - :good:`100%` - * - openmp/libomptarget/DeviceRTL/src - - `12` - - `9` - - `3` - - :part:`75%` - * - openmp/libomptarget/include - - `9` - - `8` - - `1` - - :part:`88%` - * - openmp/libomptarget/plugins/amdgpu/dynamic_hsa - - `3` - - `2` - - `1` - - :part:`66%` - * - openmp/libomptarget/plugins/amdgpu/impl - - `13` - - `10` - - `3` - - :part:`76%` - * - openmp/libomptarget/plugins/amdgpu/src - - `2` - - `1` - - `1` - - :part:`50%` - * - openmp/libomptarget/plugins/common/elf_common - - `2` - - `2` - - `0` - - :good:`100%` - * - openmp/libomptarget/plugins/common/MemoryManager - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/libomptarget/plugins/cuda/dynamic_cuda - - `2` - - `2` - - `0` - - :good:`100%` - * - openmp/libomptarget/plugins/cuda/src - - `1` - - `0` - - `1` - - :none:`0%` - * - openmp/libomptarget/plugins/generic-elf-64bit/src - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/libomptarget/plugins/remote/include - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/libomptarget/plugins/remote/lib - - `1` - - `0` - - `1` - - :none:`0%` - * - openmp/libomptarget/plugins/remote/server - - `3` - - `3` - - `0` - - :good:`100%` - * - openmp/libomptarget/plugins/remote/src - - `3` - - `2` - - `1` - - :part:`66%` - * - openmp/libomptarget/plugins/ve/src - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/libomptarget/src - - `7` - - `6` - - `1` - - :part:`85%` - * - openmp/libomptarget/tools/deviceinfo - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/runtime/doc/doxygen - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/runtime/src - - `75` - - `65` - - `10` - - :part:`86%` - * - openmp/runtime/src/thirdparty/ittnotify - - `6` - - `5` - - `1` - - :part:`83%` - * - openmp/runtime/src/thirdparty/ittnotify/legacy - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/tools/archer - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/tools/archer/tests/ompt - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/tools/multiplex - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/tools/multiplex/tests - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/tools/multiplex/tests/custom_data_storage - - `2` - - `1` - - `1` - - :part:`50%` - * - openmp/tools/multiplex/tests/print - - `2` - - `2` - - `0` - - :good:`100%` - * - polly/include/polly - - `25` - - `25` - - `0` - - :good:`100%` - * - polly/include/polly/CodeGen - - `14` - - `14` - - `0` - - :good:`100%` - * - polly/include/polly/Support - - `12` - - `12` - - `0` - - :good:`100%` - * - polly/lib/Analysis - - `9` - - `9` - - `0` - - :good:`100%` - * - polly/lib/CodeGen - - `15` - - `15` - - `0` - - :good:`100%` - * - polly/lib/Exchange - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/lib/External/isl - - `68` - - `1` - - `67` - - :part:`1%` - * - polly/lib/External/isl/imath - - `6` - - `1` - - `5` - - :part:`16%` - * - polly/lib/External/isl/imath_wrap - - `4` - - `0` - - `4` - - :none:`0%` - * - polly/lib/External/isl/include/isl - - `59` - - `9` - - `50` - - :part:`15%` - * - polly/lib/External/isl/interface - - `8` - - `1` - - `7` - - :part:`12%` - * - polly/lib/External/pet/include - - `1` - - `0` - - `1` - - :none:`0%` - * - polly/lib/External/ppcg - - `17` - - `0` - - `17` - - :none:`0%` - * - polly/lib/Plugin - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/lib/Support - - `11` - - `11` - - `0` - - :good:`100%` - * - polly/lib/Transform - - `15` - - `15` - - `0` - - :good:`100%` - * - polly/tools/GPURuntime - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/unittests/DeLICM - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/unittests/Flatten - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/unittests/Isl - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/unittests/ScheduleOptimizer - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/unittests/ScopPassManager - - `1` - - `1` - - `0` - - :good:`100%` - * - polly/unittests/Support - - `1` - - `1` - - `0` - - :good:`100%` - * - pstl/include/pstl/internal - - `23` - - `16` - - `7` - - :part:`69%` - * - pstl/include/pstl/internal/omp - - `11` - - `8` - - `3` - - :part:`72%` - * - third-party/benchmark/cmake - - `5` - - `1` - - `4` - - :part:`20%` - * - third-party/benchmark/include/benchmark - - `1` - - `0` - - `1` - - :none:`0%` - * - third-party/benchmark/src - - `21` - - `21` - - `0` - - :good:`100%` - * - utils/bazel/llvm-project-overlay/clang/include/clang/Config - - `1` - - `1` - - `0` - - :good:`100%` - * - utils/bazel/llvm-project-overlay/llvm/include/llvm/Config - - `2` - - `1` - - `1` - - :part:`50%` - * - Total - - :total:`16432` - - :total:`8857` - - :total:`7575` - - :total:`53%` diff --git a/clang/docs/RealtimeSanitizer.rst b/clang/docs/RealtimeSanitizer.rst index 41b8bbb33baf14..e5f5abfcd9b47e 100644 --- a/clang/docs/RealtimeSanitizer.rst +++ b/clang/docs/RealtimeSanitizer.rst @@ -11,11 +11,16 @@ RealtimeSanitizer (a.k.a. RTSan) is a real-time safety testing tool for C and C+ projects. RTSan can be used to detect real-time violations, i.e. calls to methods that are not safe for use in functions with deterministic run time requirements. RTSan considers any function marked with the ``[[clang::nonblocking]]`` attribute -to be a real-time function. If RTSan detects a call to ``malloc``, ``free``, -``pthread_mutex_lock``, or anything else that could have a non-deterministic -execution time in a function marked ``[[clang::nonblocking]]`` +to be a real-time function. At run-time, if RTSan detects a call to ``malloc``, +``free``, ``pthread_mutex_lock``, or anything else that could have a +non-deterministic execution time in a function marked ``[[clang::nonblocking]]`` RTSan raises an error. +RTSan performs its analysis at run-time but shares the ``[[clang::nonblocking]]`` +attribute with the :doc:`FunctionEffectAnalysis` system, which operates at +compile-time to detect potential real-time safety violations. For comprehensive +detection of real-time safety issues, it is recommended to use both systems together. + The runtime slowdown introduced by RealtimeSanitizer is negligible. How to build diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a39ffc8366dda4..402203f89e23a0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -140,7 +140,7 @@ C++ Specific Potentially Breaking Changes unsigned operator""_udl_name(unsigned long long); - Clang will now produce an error diagnostic when [[clang::lifetimebound]] is - applied on a parameter of a function that returns void. This was previously + applied on a parameter of a function that returns void. This was previously ignored and had no effect. (#GH107556) .. code-block:: c++ @@ -469,7 +469,8 @@ Bug Fixes in This Version - Fixed a crash using ``__array_rank`` on 64-bit targets. (#GH113044). - The warning emitted for an unsupported register variable type now points to the unsupported type instead of the ``register`` keyword (#GH109776). -- Fixed a crash when emit ctor for global variant with flexible array init (#GH113187). +- Fixed a crash when emit ctor for global variant with flexible array init (#GH113187). +- Fixed a crash when GNU statement expression contains invalid statement (#GH113468). Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -573,6 +574,8 @@ Bug Fixes to C++ Support (#GH95854). - Fixed an assertion failure when evaluating an invalid expression in an array initializer. (#GH112140) - Fixed an assertion failure in range calculations for conditional throw expressions. (#GH111854) +- Clang now correctly ignores previous partial specializations of member templates explicitly specialized for + an implicitly instantiated class template specialization. (#GH51051) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -742,6 +745,8 @@ AST Matchers - Fixed a crash when traverse lambda expr with invalid captures. (#GH106444) +- Fixed ``isInstantiated`` and ``isInTemplateInstantiation`` to also match for variable templates. (#GH110666) + - Ensure ``hasName`` matches template specializations across inline namespaces, making `matchesNodeFullSlow` and `matchesNodeFullFast` consistent. diff --git a/clang/docs/index.rst b/clang/docs/index.rst index 1096432813fac5..66a4540a0bcacf 100644 --- a/clang/docs/index.rst +++ b/clang/docs/index.rst @@ -93,7 +93,6 @@ Using Clang Tools ClangCheck ClangFormat ClangFormatStyleOptions - ClangFormattedStatus ClangLinkerWrapper ClangNVLinkWrapper ClangOffloadBundler diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt deleted file mode 100644 index 67ff085144f4de..00000000000000 --- a/clang/docs/tools/clang-formatted-files.txt +++ /dev/null @@ -1,8827 +0,0 @@ -bolt/include/bolt/Core/BinaryData.h -bolt/include/bolt/Core/BinaryEmitter.h -bolt/include/bolt/Core/BinaryLoop.h -bolt/include/bolt/Core/BinarySection.h -bolt/include/bolt/Core/DebugData.h -bolt/include/bolt/Core/Exceptions.h -bolt/include/bolt/Core/JumpTable.h -bolt/include/bolt/Core/MCPlus.h -bolt/include/bolt/Core/MCPlusBuilder.h -bolt/include/bolt/Core/ParallelUtilities.h -bolt/include/bolt/Passes/ADRRelaxationPass.h -bolt/include/bolt/Passes/Aligner.h -bolt/include/bolt/Passes/AllocCombiner.h -bolt/include/bolt/Passes/AsmDump.h -bolt/include/bolt/Passes/BinaryFunctionCallGraph.h -bolt/include/bolt/Passes/BinaryPasses.h -bolt/include/bolt/Passes/CacheMetrics.h -bolt/include/bolt/Passes/CallGraph.h -bolt/include/bolt/Passes/CallGraphWalker.h -bolt/include/bolt/Passes/DataflowAnalysis.h -bolt/include/bolt/Passes/DataflowInfoManager.h -bolt/include/bolt/Passes/DominatorAnalysis.h -bolt/include/bolt/Passes/FrameAnalysis.h -bolt/include/bolt/Passes/FrameOptimizer.h -bolt/include/bolt/Passes/HFSort.h -bolt/include/bolt/Passes/IdenticalCodeFolding.h -bolt/include/bolt/Passes/IndirectCallPromotion.h -bolt/include/bolt/Passes/Inliner.h -bolt/include/bolt/Passes/Instrumentation.h -bolt/include/bolt/Passes/InstrumentationSummary.h -bolt/include/bolt/Passes/JTFootprintReduction.h -bolt/include/bolt/Passes/LivenessAnalysis.h -bolt/include/bolt/Passes/LongJmp.h -bolt/include/bolt/Passes/LoopInversionPass.h -bolt/include/bolt/Passes/MCF.h -bolt/include/bolt/Passes/PatchEntries.h -bolt/include/bolt/Passes/PLTCall.h -bolt/include/bolt/Passes/ReachingDefOrUse.h -bolt/include/bolt/Passes/ReachingInsns.h -bolt/include/bolt/Passes/RegAnalysis.h -bolt/include/bolt/Passes/RegReAssign.h -bolt/include/bolt/Passes/ReorderAlgorithm.h -bolt/include/bolt/Passes/ReorderData.h -bolt/include/bolt/Passes/ReorderFunctions.h -bolt/include/bolt/Passes/ReorderUtils.h -bolt/include/bolt/Passes/RetpolineInsertion.h -bolt/include/bolt/Passes/ShrinkWrapping.h -bolt/include/bolt/Passes/SplitFunctions.h -bolt/include/bolt/Passes/StackAllocationAnalysis.h -bolt/include/bolt/Passes/StackAvailableExpressions.h -bolt/include/bolt/Passes/StackPointerTracking.h -bolt/include/bolt/Passes/StackReachingUses.h -bolt/include/bolt/Passes/StokeInfo.h -bolt/include/bolt/Passes/TailDuplication.h -bolt/include/bolt/Passes/ThreeWayBranch.h -bolt/include/bolt/Passes/ValidateInternalCalls.h -bolt/include/bolt/Passes/VeneerElimination.h -bolt/include/bolt/Profile/BoltAddressTranslation.h -bolt/include/bolt/Profile/DataAggregator.h -bolt/include/bolt/Profile/DataReader.h -bolt/include/bolt/Profile/Heatmap.h -bolt/include/bolt/Profile/ProfileReaderBase.h -bolt/include/bolt/Profile/ProfileYAMLMapping.h -bolt/include/bolt/Profile/YAMLProfileReader.h -bolt/include/bolt/Profile/YAMLProfileWriter.h -bolt/include/bolt/Rewrite/BinaryPassManager.h -bolt/include/bolt/Rewrite/DWARFRewriter.h -bolt/include/bolt/Rewrite/ExecutableFileMemoryManager.h -bolt/include/bolt/Rewrite/MachORewriteInstance.h -bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h -bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h -bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h -bolt/include/bolt/Utils/CommandLineOpts.h -bolt/include/bolt/Utils/NameResolver.h -bolt/include/bolt/Utils/NameShortener.h -bolt/include/bolt/Utils/Utils.h -bolt/lib/Core/BinaryBasicBlock.cpp -bolt/lib/Core/BinarySection.cpp -bolt/lib/Core/DebugData.cpp -bolt/lib/Core/JumpTable.cpp -bolt/lib/Core/MCPlusBuilder.cpp -bolt/lib/Passes/ADRRelaxationPass.cpp -bolt/lib/Passes/AllocCombiner.cpp -bolt/lib/Passes/AsmDump.cpp -bolt/lib/Passes/BinaryFunctionCallGraph.cpp -bolt/lib/Passes/CacheMetrics.cpp -bolt/lib/Passes/CallGraphWalker.cpp -bolt/lib/Passes/DataflowAnalysis.cpp -bolt/lib/Passes/DataflowInfoManager.cpp -bolt/lib/Passes/HFSort.cpp -bolt/lib/Passes/IndirectCallPromotion.cpp -bolt/lib/Passes/Instrumentation.cpp -bolt/lib/Passes/JTFootprintReduction.cpp -bolt/lib/Passes/LivenessAnalysis.cpp -bolt/lib/Passes/LoopInversionPass.cpp -bolt/lib/Passes/PettisAndHansen.cpp -bolt/lib/Passes/StackAllocationAnalysis.cpp -bolt/lib/Passes/StackPointerTracking.cpp -bolt/lib/Passes/StackReachingUses.cpp -bolt/lib/Passes/TailDuplication.cpp -bolt/lib/Passes/ThreeWayBranch.cpp -bolt/lib/Passes/ValidateInternalCalls.cpp -bolt/lib/Profile/BoltAddressTranslation.cpp -bolt/lib/Profile/Heatmap.cpp -bolt/lib/Profile/ProfileReaderBase.cpp -bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp -bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp -bolt/lib/RuntimeLibs/RuntimeLibrary.cpp -bolt/lib/Utils/Utils.cpp -bolt/tools/heatmap/heatmap.cpp -bolt/tools/llvm-bolt-fuzzer/llvm-bolt-fuzzer.cpp -bolt/unittests/Core/MCPlusBuilder.cpp -clang/bindings/python/tests/cindex/INPUTS/header1.h -clang/bindings/python/tests/cindex/INPUTS/header2.h -clang/bindings/python/tests/cindex/INPUTS/header3.h -clang/examples/Attribute/Attribute.cpp -clang/examples/CallSuperAttribute/CallSuperAttrInfo.cpp -clang/examples/PluginsOrder/PluginsOrder.cpp -clang/include/clang/Analysis/BodyFarm.h -clang/include/clang/Analysis/IssueHash.h -clang/include/clang/Analysis/MacroExpansionContext.h -clang/include/clang/Analysis/Analyses/CalledOnceCheck.h -clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h -clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h -clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h -clang/include/clang/Analysis/FlowSensitive/ASTOps.h -clang/include/clang/Analysis/FlowSensitive/CNFFormula.h -clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h -clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h -clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h -clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h -clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h -clang/include/clang/Analysis/FlowSensitive/DebugSupport.h -clang/include/clang/Analysis/FlowSensitive/MapLattice.h -clang/include/clang/Analysis/FlowSensitive/MatchSwitch.h -clang/include/clang/Analysis/FlowSensitive/NoopAnalysis.h -clang/include/clang/Analysis/FlowSensitive/NoopLattice.h -clang/include/clang/Analysis/FlowSensitive/Solver.h -clang/include/clang/Analysis/FlowSensitive/StorageLocation.h -clang/include/clang/Analysis/FlowSensitive/Transfer.h -clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h -clang/include/clang/Analysis/FlowSensitive/Value.h -clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h -clang/include/clang/APINotes/APINotesYAMLCompiler.h -clang/include/clang/APINotes/Types.h -clang/include/clang/AST/AST.h -clang/include/clang/AST/ASTContextAllocate.h -clang/include/clang/AST/ASTDumper.h -clang/include/clang/AST/ASTFwd.h -clang/include/clang/AST/ASTImporterLookupTable.h -clang/include/clang/AST/ASTImporterSharedState.h -clang/include/clang/AST/AttrVisitor.h -clang/include/clang/AST/Availability.h -clang/include/clang/AST/ComputeDependence.h -clang/include/clang/AST/CurrentSourceLocExprScope.h -clang/include/clang/AST/DataCollection.h -clang/include/clang/AST/ExprOpenMP.h -clang/include/clang/AST/LexicallyOrderedRecursiveASTVisitor.h -clang/include/clang/AST/LocInfoType.h -clang/include/clang/AST/MangleNumberingContext.h -clang/include/clang/AST/OptionalDiagnostic.h -clang/include/clang/AST/OSLog.h -clang/include/clang/AST/QualTypeNames.h -clang/include/clang/AST/RecordLayout.h -clang/include/clang/AST/TemplateArgumentVisitor.h -clang/include/clang/ASTMatchers/ASTMatchersMacros.h -clang/include/clang/ASTMatchers/Dynamic/Registry.h -clang/include/clang/Basic/AddressSpaces.h -clang/include/clang/Basic/AlignedAllocation.h -clang/include/clang/Basic/AttributeCommonInfo.h -clang/include/clang/Basic/Attributes.h -clang/include/clang/Basic/AttrSubjectMatchRules.h -clang/include/clang/Basic/CLWarnings.h -clang/include/clang/Basic/CommentOptions.h -clang/include/clang/Basic/Cuda.h -clang/include/clang/Basic/DarwinSDKInfo.h -clang/include/clang/Basic/DiagnosticAnalysis.h -clang/include/clang/Basic/DiagnosticAST.h -clang/include/clang/Basic/DiagnosticComment.h -clang/include/clang/Basic/DiagnosticCrossTU.h -clang/include/clang/Basic/DiagnosticDriver.h -clang/include/clang/Basic/DiagnosticError.h -clang/include/clang/Basic/DiagnosticFrontend.h -clang/include/clang/Basic/DiagnosticLex.h -clang/include/clang/Basic/DiagnosticParse.h -clang/include/clang/Basic/DiagnosticRefactoring.h -clang/include/clang/Basic/DiagnosticSema.h -clang/include/clang/Basic/DiagnosticSerialization.h -clang/include/clang/Basic/ExpressionTraits.h -clang/include/clang/Basic/FileSystemOptions.h -clang/include/clang/Basic/NoSanitizeList.h -clang/include/clang/Basic/ProfileList.h -clang/include/clang/Basic/SanitizerSpecialCaseList.h -clang/include/clang/Basic/SyncScope.h -clang/include/clang/Basic/TargetID.h -clang/include/clang/Basic/Thunk.h -clang/include/clang/Basic/TypeTraits.h -clang/include/clang/Basic/XRayInstr.h -clang/include/clang/Basic/XRayLists.h -clang/include/clang/CrossTU/CrossTUDiagnostic.h -clang/include/clang/DirectoryWatcher/DirectoryWatcher.h -clang/include/clang/Driver/Distro.h -clang/include/clang/Driver/DriverDiagnostic.h -clang/include/clang/Driver/OptionUtils.h -clang/include/clang/Driver/XRayArgs.h -clang/include/clang/Edit/EditsReceiver.h -clang/include/clang/Format/Format.h -clang/include/clang/Frontend/FrontendDiagnostic.h -clang/include/clang/Frontend/FrontendPluginRegistry.h -clang/include/clang/Frontend/LogDiagnosticPrinter.h -clang/include/clang/Frontend/PCHContainerOperations.h -clang/include/clang/Frontend/PrecompiledPreamble.h -clang/include/clang/Frontend/TextDiagnosticBuffer.h -clang/include/clang/Frontend/TextDiagnosticPrinter.h -clang/include/clang/Index/DeclOccurrence.h -clang/include/clang/Index/IndexingOptions.h -clang/include/clang/IndexSerialization/SerializablePathCollection.h -clang/include/clang/Interpreter/Interpreter.h -clang/include/clang/Interpreter/PartialTranslationUnit.h -clang/include/clang/Lex/DependencyDirectivesSourceMinimizer.h -clang/include/clang/Lex/HeaderMap.h -clang/include/clang/Lex/HeaderMapTypes.h -clang/include/clang/Lex/HeaderSearchOptions.h -clang/include/clang/Lex/LexDiagnostic.h -clang/include/clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h -clang/include/clang/Parse/LoopHint.h -clang/include/clang/Parse/ParseDiagnostic.h -clang/include/clang/Sema/CleanupInfo.h -clang/include/clang/Sema/SemaDiagnostic.h -clang/include/clang/Sema/TemplateInstCallback.h -clang/include/clang/Serialization/ASTBitCodes.h -clang/include/clang/Serialization/InMemoryModuleCache.h -clang/include/clang/Serialization/SerializationDiagnostic.h -clang/include/clang/StaticAnalyzer/Core/CheckerRegistryData.h -clang/include/clang/StaticAnalyzer/Core/BugReporter/CommonBugCategories.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicCastInfo.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicType.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeInfo.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConstraintManager.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/SMTConv.h -clang/include/clang/StaticAnalyzer/Core/PathSensitive/StoreRef.h -clang/include/clang/StaticAnalyzer/Frontend/AnalyzerHelpFlags.h -clang/include/clang/StaticAnalyzer/Frontend/FrontendActions.h -clang/include/clang/Testing/CommandLineArgs.h -clang/include/clang/Testing/TestClangConfig.h -clang/include/clang/Tooling/AllTUsExecution.h -clang/include/clang/Tooling/ArgumentsAdjusters.h -clang/include/clang/Tooling/CompilationDatabasePluginRegistry.h -clang/include/clang/Tooling/DiagnosticsYaml.h -clang/include/clang/Tooling/Execution.h -clang/include/clang/Tooling/JSONCompilationDatabase.h -clang/include/clang/Tooling/Refactoring.h -clang/include/clang/Tooling/StandaloneExecution.h -clang/include/clang/Tooling/ToolExecutorPluginRegistry.h -clang/include/clang/Tooling/ASTDiff/ASTDiff.h -clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h -clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h -clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h -clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h -clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h -clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h -clang/include/clang/Tooling/Inclusions/HeaderIncludes.h -clang/include/clang/Tooling/Inclusions/IncludeStyle.h -clang/include/clang/Tooling/Inclusions/StandardLibrary.h -clang/include/clang/Tooling/Refactoring/ASTSelection.h -clang/include/clang/Tooling/Refactoring/AtomicChange.h -clang/include/clang/Tooling/Refactoring/Lookup.h -clang/include/clang/Tooling/Refactoring/RecursiveSymbolVisitor.h -clang/include/clang/Tooling/Refactoring/RefactoringAction.h -clang/include/clang/Tooling/Refactoring/RefactoringActionRule.h -clang/include/clang/Tooling/Refactoring/RefactoringActionRuleRequirements.h -clang/include/clang/Tooling/Refactoring/RefactoringDiagnostic.h -clang/include/clang/Tooling/Refactoring/RefactoringOption.h -clang/include/clang/Tooling/Refactoring/RefactoringOptions.h -clang/include/clang/Tooling/Refactoring/RefactoringOptionVisitor.h -clang/include/clang/Tooling/Refactoring/RefactoringRuleContext.h -clang/include/clang/Tooling/Refactoring/Extract/Extract.h -clang/include/clang/Tooling/Refactoring/Extract/SourceExtraction.h -clang/include/clang/Tooling/Refactoring/Rename/SymbolName.h -clang/include/clang/Tooling/Refactoring/Rename/SymbolOccurrences.h -clang/include/clang/Tooling/Refactoring/Rename/USRFinder.h -clang/include/clang/Tooling/Refactoring/Rename/USRFindingAction.h -clang/include/clang/Tooling/Refactoring/Rename/USRLocFinder.h -clang/include/clang/Tooling/Syntax/BuildTree.h -clang/include/clang/Tooling/Syntax/Mutations.h -clang/include/clang/Tooling/Syntax/Nodes.h -clang/include/clang/Tooling/Syntax/Tokens.h -clang/include/clang/Tooling/Syntax/Tree.h -clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h -clang/include/clang/Tooling/Syntax/Pseudo/LRGraph.h -clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h -clang/include/clang/Tooling/Syntax/Pseudo/Preprocess.h -clang/include/clang/Tooling/Syntax/Pseudo/Token.h -clang/include/clang/Tooling/Transformer/MatchConsumer.h -clang/include/clang/Tooling/Transformer/Parsing.h -clang/include/clang/Tooling/Transformer/RangeSelector.h -clang/include/clang/Tooling/Transformer/SourceCode.h -clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h -clang/include/clang/Tooling/Transformer/Transformer.h -clang/include/clang-c/ExternC.h -clang/include/clang-c/FatalErrorHandler.h -clang/include/clang-c/Index.h -clang/lib/Analysis/CalledOnceCheck.cpp -clang/lib/Analysis/CloneDetection.cpp -clang/lib/Analysis/CodeInjector.cpp -clang/lib/Analysis/FlowSensitive/AdornedCFG.cpp -clang/lib/Analysis/FlowSensitive/ASTOps.cpp -clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp -clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp -clang/lib/Analysis/FlowSensitive/DebugSupport.cpp -clang/lib/Analysis/FlowSensitive/Transfer.cpp -clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp -clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp -clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp -clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp -clang/lib/APINotes/APINotesFormat.h -clang/lib/APINotes/APINotesTypes.cpp -clang/lib/APINotes/APINotesYAMLCompiler.cpp -clang/lib/AST/DataCollection.cpp -clang/lib/AST/Linkage.h -clang/lib/AST/ByteCode/ByteCodeGenError.cpp -clang/lib/AST/ByteCode/ByteCodeGenError.h -clang/lib/AST/ByteCode/Context.cpp -clang/lib/AST/ByteCode/Context.h -clang/lib/AST/ByteCode/Descriptor.cpp -clang/lib/AST/ByteCode/Disasm.cpp -clang/lib/AST/ByteCode/EvalEmitter.h -clang/lib/AST/ByteCode/Frame.cpp -clang/lib/AST/ByteCode/Frame.h -clang/lib/AST/ByteCode/InterpState.h -clang/lib/AST/ByteCode/Opcode.h -clang/lib/AST/ByteCode/Pointer.cpp -clang/lib/AST/ByteCode/PrimType.cpp -clang/lib/AST/ByteCode/Record.h -clang/lib/AST/ByteCode/Source.cpp -clang/lib/AST/ByteCode/Source.h -clang/lib/AST/ByteCode/State.cpp -clang/lib/AST/ByteCode/State.h -clang/lib/ASTMatchers/GtestMatchers.cpp -clang/lib/ASTMatchers/Dynamic/Marshallers.cpp -clang/lib/Basic/Attributes.cpp -clang/lib/Basic/DarwinSDKInfo.cpp -clang/lib/Basic/DiagnosticOptions.cpp -clang/lib/Basic/ExpressionTraits.cpp -clang/lib/Basic/FileEntry.cpp -clang/lib/Basic/NoSanitizeList.cpp -clang/lib/Basic/OpenCLOptions.cpp -clang/lib/Basic/SanitizerSpecialCaseList.cpp -clang/lib/Basic/TargetID.cpp -clang/lib/Basic/Targets.h -clang/lib/Basic/TypeTraits.cpp -clang/lib/Basic/XRayInstr.cpp -clang/lib/Basic/XRayLists.cpp -clang/lib/Basic/Targets/ARC.cpp -clang/lib/Basic/Targets/ARC.h -clang/lib/Basic/Targets/AVR.cpp -clang/lib/Basic/Targets/BPF.cpp -clang/lib/Basic/Targets/BPF.h -clang/lib/Basic/Targets/Hexagon.h -clang/lib/Basic/Targets/Lanai.h -clang/lib/Basic/Targets/M68k.h -clang/lib/Basic/Targets/MSP430.h -clang/lib/Basic/Targets/NVPTX.cpp -clang/lib/Basic/Targets/OSTargets.cpp -clang/lib/Basic/Targets/PNaCl.cpp -clang/lib/Basic/Targets/PNaCl.h -clang/lib/Basic/Targets/RISCV.h -clang/lib/Basic/Targets/Sparc.h -clang/lib/Basic/Targets/SPIR.cpp -clang/lib/Basic/Targets/SystemZ.h -clang/lib/Basic/Targets/TCE.cpp -clang/lib/Basic/Targets/TCE.h -clang/lib/Basic/Targets/VE.cpp -clang/lib/Basic/Targets/VE.h -clang/lib/Basic/Targets/WebAssembly.cpp -clang/lib/Basic/Targets/WebAssembly.h -clang/lib/Basic/Targets/XCore.cpp -clang/lib/CodeGen/ABIInfoImpl.cpp -clang/lib/CodeGen/ABIInfoImpl.h -clang/lib/CodeGen/CGCUDARuntime.cpp -clang/lib/CodeGen/CGLoopInfo.cpp -clang/lib/CodeGen/CGLoopInfo.h -clang/lib/CodeGen/CGStmtOpenMP.cpp -clang/lib/CodeGen/MacroPPCallbacks.cpp -clang/lib/CodeGen/ObjectFilePCHContainerOperations.cpp -clang/lib/CodeGen/PatternInit.cpp -clang/lib/CodeGen/PatternInit.h -clang/lib/CodeGen/VarBypassDetector.cpp -clang/lib/DirectoryWatcher/DirectoryScanner.cpp -clang/lib/DirectoryWatcher/DirectoryScanner.h -clang/lib/Driver/Distro.cpp -clang/lib/Driver/XRayArgs.cpp -clang/lib/Driver/ToolChains/AIX.cpp -clang/lib/Driver/ToolChains/AIX.h -clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp -clang/lib/Driver/ToolChains/AMDGPUOpenMP.h -clang/lib/Driver/ToolChains/AVR.cpp -clang/lib/Driver/ToolChains/AVR.h -clang/lib/Driver/ToolChains/CommonArgs.h -clang/lib/Driver/ToolChains/CrossWindows.h -clang/lib/Driver/ToolChains/DragonFly.h -clang/lib/Driver/ToolChains/FreeBSD.cpp -clang/lib/Driver/ToolChains/FreeBSD.h -clang/lib/Driver/ToolChains/HIPAMD.h -clang/lib/Driver/ToolChains/HIPSPV.cpp -clang/lib/Driver/ToolChains/HIPSPV.h -clang/lib/Driver/ToolChains/HIPUtility.cpp -clang/lib/Driver/ToolChains/HIPUtility.h -clang/lib/Driver/ToolChains/Hurd.cpp -clang/lib/Driver/ToolChains/Hurd.h -clang/lib/Driver/ToolChains/InterfaceStubs.cpp -clang/lib/Driver/ToolChains/InterfaceStubs.h -clang/lib/Driver/ToolChains/MipsLinux.cpp -clang/lib/Driver/ToolChains/MSP430.h -clang/lib/Driver/ToolChains/PPCFreeBSD.cpp -clang/lib/Driver/ToolChains/PPCFreeBSD.h -clang/lib/Driver/ToolChains/PPCLinux.h -clang/lib/Driver/ToolChains/ROCm.h -clang/lib/Driver/ToolChains/Solaris.cpp -clang/lib/Driver/ToolChains/Solaris.h -clang/lib/Driver/ToolChains/SPIRV.cpp -clang/lib/Driver/ToolChains/SPIRV.h -clang/lib/Driver/ToolChains/TCE.h -clang/lib/Driver/ToolChains/VEToolchain.cpp -clang/lib/Driver/ToolChains/VEToolchain.h -clang/lib/Driver/ToolChains/WebAssembly.h -clang/lib/Driver/ToolChains/XCore.cpp -clang/lib/Driver/ToolChains/ZOS.cpp -clang/lib/Driver/ToolChains/ZOS.h -clang/lib/Driver/ToolChains/Arch/ARM.h -clang/lib/Driver/ToolChains/Arch/M68k.cpp -clang/lib/Driver/ToolChains/Arch/M68k.h -clang/lib/Driver/ToolChains/Arch/RISCV.h -clang/lib/Driver/ToolChains/Arch/VE.cpp -clang/lib/Driver/ToolChains/Arch/VE.h -clang/lib/Driver/ToolChains/Arch/X86.cpp -clang/lib/Format/AffectedRangeManager.cpp -clang/lib/Format/AffectedRangeManager.h -clang/lib/Format/BreakableToken.cpp -clang/lib/Format/BreakableToken.h -clang/lib/Format/ContinuationIndenter.cpp -clang/lib/Format/ContinuationIndenter.h -clang/lib/Format/DefinitionBlockSeparator.cpp -clang/lib/Format/DefinitionBlockSeparator.h -clang/lib/Format/Encoding.h -clang/lib/Format/Format.cpp -clang/lib/Format/FormatInternal.h -clang/lib/Format/FormatToken.cpp -clang/lib/Format/FormatToken.h -clang/lib/Format/FormatTokenLexer.cpp -clang/lib/Format/FormatTokenLexer.h -clang/lib/Format/MacroExpander.cpp -clang/lib/Format/Macros.h -clang/lib/Format/NamespaceEndCommentsFixer.cpp -clang/lib/Format/NamespaceEndCommentsFixer.h -clang/lib/Format/QualifierAlignmentFixer.cpp -clang/lib/Format/QualifierAlignmentFixer.h -clang/lib/Format/SortJavaScriptImports.cpp -clang/lib/Format/SortJavaScriptImports.h -clang/lib/Format/TokenAnalyzer.cpp -clang/lib/Format/TokenAnalyzer.h -clang/lib/Format/TokenAnnotator.cpp -clang/lib/Format/TokenAnnotator.h -clang/lib/Format/UnwrappedLineFormatter.cpp -clang/lib/Format/UnwrappedLineFormatter.h -clang/lib/Format/UnwrappedLineParser.cpp -clang/lib/Format/UnwrappedLineParser.h -clang/lib/Format/UsingDeclarationsSorter.cpp -clang/lib/Format/UsingDeclarationsSorter.h -clang/lib/Format/WhitespaceManager.cpp -clang/lib/Format/WhitespaceManager.h -clang/lib/Frontend/ExtractAPIConsumer.cpp -clang/lib/Frontend/FrontendOptions.cpp -clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp -clang/lib/Frontend/SerializedDiagnosticReader.cpp -clang/lib/Headers/amxintrin.h -clang/lib/Headers/arm_neon_sve_bridge.h -clang/lib/Headers/avx512fp16intrin.h -clang/lib/Headers/avx512vlfp16intrin.h -clang/lib/Headers/builtins.h -clang/lib/Headers/inttypes.h -clang/lib/Headers/nmmintrin.h -clang/lib/Headers/s390intrin.h -clang/lib/Headers/stdalign.h -clang/lib/Headers/wmmintrin.h -clang/lib/Headers/xtestintrin.h -clang/lib/Headers/__clang_cuda_texture_intrinsics.h -clang/lib/Headers/__clang_hip_libdevice_declares.h -clang/lib/Headers/__stddef_max_align_t.h -clang/lib/Headers/openmp_wrappers/complex.h -clang/lib/Headers/openmp_wrappers/complex_cmath.h -clang/lib/Headers/openmp_wrappers/math.h -clang/lib/Headers/openmp_wrappers/time.h -clang/lib/Headers/ppc_wrappers/mmintrin.h -clang/lib/Headers/ppc_wrappers/smmintrin.h -clang/lib/Index/FileIndexRecord.cpp -clang/lib/Index/FileIndexRecord.h -clang/lib/IndexSerialization/SerializablePathCollection.cpp -clang/lib/Interpreter/IncrementalExecutor.cpp -clang/lib/Interpreter/IncrementalExecutor.h -clang/lib/Interpreter/IncrementalParser.cpp -clang/lib/Interpreter/IncrementalParser.h -clang/lib/Interpreter/Interpreter.cpp -clang/lib/Lex/PreprocessorLexer.cpp -clang/lib/Parse/ParseOpenMP.cpp -clang/lib/Sema/CodeCompleteConsumer.cpp -clang/lib/Sema/CoroutineStmtBuilder.h -clang/lib/Sema/SemaSYCL.cpp -clang/lib/Sema/UsedDeclVisitor.h -clang/lib/Serialization/InMemoryModuleCache.cpp -clang/lib/Serialization/ModuleFileExtension.cpp -clang/lib/StaticAnalyzer/Checkers/AllocationState.h -clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp -clang/lib/StaticAnalyzer/Checkers/ErrnoModeling.cpp -clang/lib/StaticAnalyzer/Checkers/ErrnoModeling.h -clang/lib/StaticAnalyzer/Checkers/ErrnoTesterChecker.cpp -clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp -clang/lib/StaticAnalyzer/Checkers/FuchsiaHandleChecker.cpp -clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h -clang/lib/StaticAnalyzer/Checkers/Move.h -clang/lib/StaticAnalyzer/Checkers/ReturnValueChecker.cpp -clang/lib/StaticAnalyzer/Checkers/SmartPtr.h -clang/lib/StaticAnalyzer/Checkers/SmartPtrChecker.cpp -clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp -clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp -clang/lib/StaticAnalyzer/Checkers/StringChecker.cpp -clang/lib/StaticAnalyzer/Checkers/Taint.cpp -clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp -clang/lib/StaticAnalyzer/Checkers/Yaml.h -clang/lib/StaticAnalyzer/Checkers/cert/InvalidPtrChecker.cpp -clang/lib/StaticAnalyzer/Checkers/cert/PutenvWithAutoChecker.cpp -clang/lib/StaticAnalyzer/Checkers/UninitializedObject/UninitializedPointee.cpp -clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.cpp -clang/lib/StaticAnalyzer/Checkers/WebKit/ASTUtils.h -clang/lib/StaticAnalyzer/Checkers/WebKit/DiagOutputUtils.h -clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.cpp -clang/lib/StaticAnalyzer/Checkers/WebKit/PtrTypesSemantics.h -clang/lib/StaticAnalyzer/Checkers/WebKit/RefCntblBaseVirtualDtorChecker.cpp -clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLambdaCapturesChecker.cpp -clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp -clang/lib/StaticAnalyzer/Core/CallDescription.cpp -clang/lib/StaticAnalyzer/Core/CheckerHelpers.cpp -clang/lib/StaticAnalyzer/Core/CheckerRegistryData.cpp -clang/lib/StaticAnalyzer/Core/CommonBugCategories.cpp -clang/lib/StaticAnalyzer/Core/ConstraintManager.cpp -clang/lib/StaticAnalyzer/Core/DynamicExtent.cpp -clang/lib/StaticAnalyzer/Core/DynamicType.cpp -clang/lib/StaticAnalyzer/Core/FunctionSummary.cpp -clang/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp -clang/lib/StaticAnalyzer/Core/SMTConstraintManager.cpp -clang/lib/StaticAnalyzer/Frontend/CreateCheckerManager.cpp -clang/lib/StaticAnalyzer/Frontend/FrontendActions.cpp -clang/lib/StaticAnalyzer/Frontend/ModelConsumer.cpp -clang/lib/Testing/CommandLineArgs.cpp -clang/lib/Tooling/ArgumentsAdjusters.cpp -clang/lib/Tooling/Execution.cpp -clang/lib/Tooling/ExpandResponseFilesCompilationDatabase.cpp -clang/lib/Tooling/FixIt.cpp -clang/lib/Tooling/GuessTargetAndModeCompilationDatabase.cpp -clang/lib/Tooling/StandaloneExecution.cpp -clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp -clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp -clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp -clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp -clang/lib/Tooling/Inclusions/HeaderIncludes.cpp -clang/lib/Tooling/Inclusions/IncludeStyle.cpp -clang/lib/Tooling/Inclusions/StandardLibrary.cpp -clang/lib/Tooling/Refactoring/ASTSelection.cpp -clang/lib/Tooling/Refactoring/Lookup.cpp -clang/lib/Tooling/Refactoring/RefactoringActions.cpp -clang/lib/Tooling/Refactoring/Extract/Extract.cpp -clang/lib/Tooling/Refactoring/Rename/SymbolOccurrences.cpp -clang/lib/Tooling/Refactoring/Rename/USRFinder.cpp -clang/lib/Tooling/Syntax/BuildTree.cpp -clang/lib/Tooling/Syntax/ComputeReplacements.cpp -clang/lib/Tooling/Syntax/Mutations.cpp -clang/lib/Tooling/Syntax/Nodes.cpp -clang/lib/Tooling/Syntax/Synthesis.cpp -clang/lib/Tooling/Syntax/Tree.cpp -clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp -clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp -clang/lib/Tooling/Syntax/Pseudo/Lex.cpp -clang/lib/Tooling/Syntax/Pseudo/LRGraph.cpp -clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp -clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp -clang/lib/Tooling/Syntax/Pseudo/Preprocess.cpp -clang/lib/Tooling/Syntax/Pseudo/Token.cpp -clang/lib/Tooling/Transformer/Parsing.cpp -clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp -clang/lib/Tooling/Transformer/Stencil.cpp -clang/lib/Tooling/Transformer/Transformer.cpp -clang/tools/amdgpu-arch/AMDGPUArch.cpp -clang/tools/apinotes-test/APINotesTest.cpp -clang/tools/clang-format/ClangFormat.cpp -clang/tools/clang-fuzzer/ClangFuzzer.cpp -clang/tools/clang-fuzzer/DummyClangFuzzer.cpp -clang/tools/clang-fuzzer/ExampleClangLLVMProtoFuzzer.cpp -clang/tools/clang-fuzzer/ExampleClangLoopProtoFuzzer.cpp -clang/tools/clang-fuzzer/handle-llvm/handle_llvm.h -clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp -clang/tools/clang-linker-wrapper/OffloadWrapper.cpp -clang/tools/clang-refactor/ClangRefactor.cpp -clang/tools/clang-refactor/TestSupport.cpp -clang/tools/clang-refactor/TestSupport.h -clang/tools/clang-refactor/ToolRefactoringResultConsumer.h -clang/tools/clang-repl/ClangRepl.cpp -clang/tools/clang-scan-deps/ClangScanDeps.cpp -clang/tools/clang-shlib/clang-shlib.cpp -clang/tools/driver/cc1gen_reproducer_main.cpp -clang/tools/libclang/CIndex.cpp -clang/tools/libclang/CIndexUSRs.cpp -clang/tools/libclang/CursorVisitor.h -clang/tools/libclang/CXCursor.cpp -clang/tools/libclang/CXCursor.h -clang/tools/scan-build-py/tests/functional/src/include/clean-one.h -clang/unittests/Analysis/CFGBuildResult.h -clang/unittests/Analysis/MacroExpansionContextTest.cpp -clang/unittests/Analysis/FlowSensitive/ASTOpsTest.cpp -clang/unittests/Analysis/FlowSensitive/CNFFormula.cpp -clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp -clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp -clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp -clang/unittests/Analysis/FlowSensitive/MatchSwitchTest.cpp -clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp -clang/unittests/Analysis/FlowSensitive/SingleVarConstantPropagationTest.cpp -clang/unittests/Analysis/FlowSensitive/SolverTest.h -clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp -clang/unittests/Analysis/FlowSensitive/TestingSupport.h -clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp -clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp -clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp -clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolverTest.cpp -clang/unittests/AST/ASTImporterFixtures.cpp -clang/unittests/AST/ASTImporterFixtures.h -clang/unittests/AST/ASTImporterObjCTest.cpp -clang/unittests/AST/ASTPrint.h -clang/unittests/AST/AttrTest.cpp -clang/unittests/AST/RecursiveASTVisitorTest.cpp -clang/unittests/AST/SizelessTypesTest.cpp -clang/unittests/AST/TypePrinterTest.cpp -clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp -clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp -clang/unittests/ASTMatchers/ASTMatchersTest.h -clang/unittests/Basic/DarwinSDKInfoTest.cpp -clang/unittests/Basic/FileEntryTest.cpp -clang/unittests/Basic/LineOffsetMappingTest.cpp -clang/unittests/Basic/SanitizersTest.cpp -clang/unittests/CodeGen/CheckTargetFeaturesTest.cpp -clang/unittests/CrossTU/CrossTranslationUnitTest.cpp -clang/unittests/Driver/SanitizerArgsTest.cpp -clang/unittests/Format/CleanupTest.cpp -clang/unittests/Format/DefinitionBlockSeparatorTest.cpp -clang/unittests/Format/FormatTest.cpp -clang/unittests/Format/FormatTestComments.cpp -clang/unittests/Format/FormatTestCSharp.cpp -clang/unittests/Format/FormatTestJava.cpp -clang/unittests/Format/FormatTestJS.cpp -clang/unittests/Format/FormatTestJson.cpp -clang/unittests/Format/FormatTestObjC.cpp -clang/unittests/Format/FormatTestProto.cpp -clang/unittests/Format/FormatTestRawStrings.cpp -clang/unittests/Format/FormatTestSelective.cpp -clang/unittests/Format/FormatTestTableGen.cpp -clang/unittests/Format/FormatTestTextProto.cpp -clang/unittests/Format/FormatTestUtils.h -clang/unittests/Format/MacroExpanderTest.cpp -clang/unittests/Format/NamespaceEndCommentsFixerTest.cpp -clang/unittests/Format/QualifierFixerTest.cpp -clang/unittests/Format/SortImportsTestJava.cpp -clang/unittests/Format/SortImportsTestJS.cpp -clang/unittests/Format/SortIncludesTest.cpp -clang/unittests/Format/TestLexer.h -clang/unittests/Format/TokenAnnotatorTest.cpp -clang/unittests/Format/UsingDeclarationsSorterTest.cpp -clang/unittests/Frontend/ASTUnitTest.cpp -clang/unittests/Frontend/CompilerInstanceTest.cpp -clang/unittests/Frontend/FixedPointString.cpp -clang/unittests/Frontend/OutputStreamTest.cpp -clang/unittests/Frontend/ParsedSourceLocationTest.cpp -clang/unittests/Frontend/TextDiagnosticTest.cpp -clang/unittests/Frontend/UtilsTest.cpp -clang/unittests/Index/IndexTests.cpp -clang/unittests/Interpreter/IncrementalProcessingTest.cpp -clang/unittests/Interpreter/InterpreterTest.cpp -clang/unittests/Lex/HeaderMapTest.cpp -clang/unittests/Lex/HeaderMapTestUtils.h -clang/unittests/Lex/HeaderSearchTest.cpp -clang/unittests/Lex/PPMemoryAllocationsTest.cpp -clang/unittests/libclang/CrashTests/LibclangCrashTest.cpp -clang/unittests/Rewrite/RewriterTest.cpp -clang/unittests/Sema/CodeCompleteTest.cpp -clang/unittests/Sema/GslOwnerPointerInference.cpp -clang/unittests/Serialization/InMemoryModuleCacheTest.cpp -clang/unittests/Serialization/ModuleCacheTest.cpp -clang/unittests/StaticAnalyzer/BugReportInterestingnessTest.cpp -clang/unittests/StaticAnalyzer/CallEventTest.cpp -clang/unittests/StaticAnalyzer/CheckerRegistration.h -clang/unittests/StaticAnalyzer/ConflictingEvalCallsTest.cpp -clang/unittests/StaticAnalyzer/StoreTest.cpp -clang/unittests/StaticAnalyzer/SValTest.cpp -clang/unittests/StaticAnalyzer/SymbolReaperTest.cpp -clang/unittests/Tooling/CastExprTest.cpp -clang/unittests/Tooling/DependencyScannerTest.cpp -clang/unittests/Tooling/ExecutionTest.cpp -clang/unittests/Tooling/LookupTest.cpp -clang/unittests/Tooling/RecursiveASTVisitorTestPostOrderVisitor.cpp -clang/unittests/Tooling/RefactoringActionRulesTest.cpp -clang/unittests/Tooling/ReplacementTest.h -clang/unittests/Tooling/SourceCodeBuildersTest.cpp -clang/unittests/Tooling/StandardLibraryTest.cpp -clang/unittests/Tooling/StencilTest.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/CallbacksCallExpr.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/CallbacksLeaf.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/Concept.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/CXXMethodDecl.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/ImplicitCtorInitializer.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/InitListExprPostOrder.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/InitListExprPreOrder.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/IntegerLiteral.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/MemberPointerTypeLoc.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/NestedNameSpecifiers.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/ParenExpr.cpp -clang/unittests/Tooling/RecursiveASTVisitorTests/TraversalScope.cpp -clang/unittests/Tooling/Syntax/TokensTest.cpp -clang/unittests/Tooling/Syntax/TreeTestBase.cpp -clang/unittests/Tooling/Syntax/TreeTestBase.h -clang/unittests/Tooling/Syntax/Pseudo/GrammarTest.cpp -clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp -clang/unittests/Tooling/Syntax/Pseudo/PreprocessTest.cpp -clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp -clang/utils/TableGen/ClangDataCollectorsEmitter.cpp -clang/utils/TableGen/ClangSyntaxEmitter.cpp -clang/utils/TableGen/TableGenBackends.h -clang-tools-extra/clang-apply-replacements/include/clang-apply-replacements/Tooling/ApplyReplacements.h -clang-tools-extra/clang-apply-replacements/lib/Tooling/ApplyReplacements.cpp -clang-tools-extra/clang-apply-replacements/tool/ClangApplyReplacementsMain.cpp -clang-tools-extra/clang-doc/BitcodeReader.cpp -clang-tools-extra/clang-doc/BitcodeReader.h -clang-tools-extra/clang-doc/BitcodeWriter.cpp -clang-tools-extra/clang-doc/BitcodeWriter.h -clang-tools-extra/clang-doc/ClangDoc.cpp -clang-tools-extra/clang-doc/ClangDoc.h -clang-tools-extra/clang-doc/Generators.cpp -clang-tools-extra/clang-doc/Generators.h -clang-tools-extra/clang-doc/Mapper.cpp -clang-tools-extra/clang-doc/Mapper.h -clang-tools-extra/clang-doc/MDGenerator.cpp -clang-tools-extra/clang-doc/Representation.cpp -clang-tools-extra/clang-doc/Representation.h -clang-tools-extra/clang-doc/Serialize.cpp -clang-tools-extra/clang-doc/Serialize.h -clang-tools-extra/clang-doc/YAMLGenerator.cpp -clang-tools-extra/clang-doc/tool/ClangDocMain.cpp -clang-tools-extra/clang-include-fixer/FuzzySymbolIndex.cpp -clang-tools-extra/clang-include-fixer/IncludeFixer.h -clang-tools-extra/clang-include-fixer/IncludeFixerContext.h -clang-tools-extra/clang-include-fixer/InMemorySymbolIndex.cpp -clang-tools-extra/clang-include-fixer/InMemorySymbolIndex.h -clang-tools-extra/clang-include-fixer/SymbolIndex.h -clang-tools-extra/clang-include-fixer/YamlSymbolIndex.cpp -clang-tools-extra/clang-include-fixer/YamlSymbolIndex.h -clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.cpp -clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllMacros.h -clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbols.h -clang-tools-extra/clang-include-fixer/find-all-symbols/FindAllSymbolsAction.h -clang-tools-extra/clang-include-fixer/find-all-symbols/HeaderMapCollector.cpp -clang-tools-extra/clang-include-fixer/find-all-symbols/HeaderMapCollector.h -clang-tools-extra/clang-include-fixer/find-all-symbols/PathConfig.cpp -clang-tools-extra/clang-include-fixer/find-all-symbols/PathConfig.h -clang-tools-extra/clang-include-fixer/find-all-symbols/PragmaCommentHandler.cpp -clang-tools-extra/clang-include-fixer/find-all-symbols/PragmaCommentHandler.h -clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp -clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.h -clang-tools-extra/clang-include-fixer/find-all-symbols/SymbolReporter.h -clang-tools-extra/clang-include-fixer/plugin/IncludeFixerPlugin.cpp -clang-tools-extra/clang-move/HelperDeclRefGraph.h -clang-tools-extra/clang-move/tool/ClangMove.cpp -clang-tools-extra/clang-query/Query.cpp -clang-tools-extra/clang-query/Query.h -clang-tools-extra/clang-query/QueryParser.h -clang-tools-extra/clang-query/QuerySession.h -clang-tools-extra/clang-reorder-fields/ReorderFieldsAction.h -clang-tools-extra/clang-tidy/ClangTidy.h -clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp -clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h -clang-tools-extra/clang-tidy/ClangTidyForceLinker.h -clang-tools-extra/clang-tidy/ClangTidyModule.cpp -clang-tools-extra/clang-tidy/ClangTidyModule.h -clang-tools-extra/clang-tidy/ClangTidyModuleRegistry.h -clang-tools-extra/clang-tidy/ClangTidyOptions.h -clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp -clang-tools-extra/clang-tidy/ClangTidyProfiling.h -clang-tools-extra/clang-tidy/GlobList.cpp -clang-tools-extra/clang-tidy/GlobList.h -clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp -clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h -clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h -clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp -clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h -clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationAdditionCheck.h -clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationComparisonCheck.h -clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationConversionCastCheck.h -clang-tools-extra/clang-tidy/abseil/DurationDivisionCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationFactoryFloatCheck.h -clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.h -clang-tools-extra/clang-tidy/abseil/DurationRewriter.cpp -clang-tools-extra/clang-tidy/abseil/DurationRewriter.h -clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationSubtractionCheck.h -clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.cpp -clang-tools-extra/clang-tidy/abseil/DurationUnnecessaryConversionCheck.h -clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.cpp -clang-tools-extra/clang-tidy/abseil/FasterStrsplitDelimiterCheck.h -clang-tools-extra/clang-tidy/abseil/NoNamespaceCheck.h -clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp -clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.h -clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.cpp -clang-tools-extra/clang-tidy/abseil/StringFindStrContainsCheck.h -clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.cpp -clang-tools-extra/clang-tidy/abseil/TimeComparisonCheck.h -clang-tools-extra/clang-tidy/abseil/TimeSubtractionCheck.cpp -clang-tools-extra/clang-tidy/abseil/UpgradeDurationConversionsCheck.cpp -clang-tools-extra/clang-tidy/altera/AlteraTidyModule.cpp -clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.cpp -clang-tools-extra/clang-tidy/altera/IdDependentBackwardBranchCheck.h -clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp -clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.h -clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.cpp -clang-tools-extra/clang-tidy/altera/SingleWorkItemBarrierCheck.h -clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp -clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.h -clang-tools-extra/clang-tidy/android/CloexecAccept4Check.cpp -clang-tools-extra/clang-tidy/android/CloexecAccept4Check.h -clang-tools-extra/clang-tidy/android/CloexecAcceptCheck.h -clang-tools-extra/clang-tidy/android/CloexecCheck.h -clang-tools-extra/clang-tidy/android/CloexecCreatCheck.h -clang-tools-extra/clang-tidy/android/CloexecDupCheck.cpp -clang-tools-extra/clang-tidy/android/CloexecDupCheck.h -clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.cpp -clang-tools-extra/clang-tidy/android/CloexecEpollCreate1Check.h -clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.cpp -clang-tools-extra/clang-tidy/android/CloexecEpollCreateCheck.h -clang-tools-extra/clang-tidy/android/CloexecFopenCheck.h -clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.cpp -clang-tools-extra/clang-tidy/android/CloexecInotifyInit1Check.h -clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.cpp -clang-tools-extra/clang-tidy/android/CloexecInotifyInitCheck.h -clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.cpp -clang-tools-extra/clang-tidy/android/CloexecMemfdCreateCheck.h -clang-tools-extra/clang-tidy/android/CloexecOpenCheck.h -clang-tools-extra/clang-tidy/android/CloexecPipe2Check.h -clang-tools-extra/clang-tidy/android/CloexecPipeCheck.h -clang-tools-extra/clang-tidy/android/CloexecSocketCheck.h -clang-tools-extra/clang-tidy/android/ComparisonInTempFailureRetryCheck.h -clang-tools-extra/clang-tidy/boost/BoostTidyModule.cpp -clang-tools-extra/clang-tidy/boost/UseToStringCheck.cpp -clang-tools-extra/clang-tidy/boost/UseToStringCheck.h -clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.h -clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp -clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h -clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp -clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.h -clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.cpp -clang-tools-extra/clang-tidy/bugprone/BoolPointerImplicitConversionCheck.h -clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.cpp -clang-tools-extra/clang-tidy/bugprone/BranchCloneCheck.h -clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.cpp -clang-tools-extra/clang-tidy/bugprone/CopyConstructorInitCheck.h -clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.cpp -clang-tools-extra/clang-tidy/bugprone/DanglingHandleCheck.h -clang-tools-extra/clang-tidy/bugprone/DynamicStaticInitializersCheck.h -clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.cpp -clang-tools-extra/clang-tidy/bugprone/EasilySwappableParametersCheck.h -clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ExceptionEscapeCheck.h -clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.cpp -clang-tools-extra/clang-tidy/bugprone/FoldInitTypeCheck.h -clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ForwardDeclarationNamespaceCheck.h -clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ForwardingReferenceOverloadCheck.h -clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ImplicitWideningOfMultiplicationResultCheck.h -clang-tools-extra/clang-tidy/bugprone/InaccurateEraseCheck.h -clang-tools-extra/clang-tidy/bugprone/IncorrectRoundingsCheck.h -clang-tools-extra/clang-tidy/bugprone/InfiniteLoopCheck.h -clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.cpp -clang-tools-extra/clang-tidy/bugprone/IntegerDivisionCheck.h -clang-tools-extra/clang-tidy/bugprone/LambdaFunctionNameCheck.h -clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.cpp -clang-tools-extra/clang-tidy/bugprone/MacroParenthesesCheck.h -clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.cpp -clang-tools-extra/clang-tidy/bugprone/MacroRepeatedSideEffectsCheck.h -clang-tools-extra/clang-tidy/bugprone/MisplacedPointerArithmeticInAllocCheck.h -clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp -clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.h -clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.cpp -clang-tools-extra/clang-tidy/bugprone/MoveForwardingReferenceCheck.h -clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.cpp -clang-tools-extra/clang-tidy/bugprone/MultipleStatementMacroCheck.h -clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.cpp -clang-tools-extra/clang-tidy/bugprone/NoEscapeCheck.h -clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.cpp -clang-tools-extra/clang-tidy/bugprone/NotNullTerminatedResultCheck.h -clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ParentVirtualCallCheck.h -clang-tools-extra/clang-tidy/bugprone/PosixReturnCheck.cpp -clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.h -clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h -clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.h -clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h -clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.h -clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.h -clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h -clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.h -clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.cpp -clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.cpp -clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h -clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.cpp -clang-tools-extra/clang-tidy/bugprone/StringIntegerAssignmentCheck.h -clang-tools-extra/clang-tidy/bugprone/StringLiteralWithEmbeddedNulCheck.h -clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp -clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.h -clang-tools-extra/clang-tidy/bugprone/SuspiciousEnumUsageCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SuspiciousIncludeCheck.h -clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SuspiciousMemoryComparisonCheck.h -clang-tools-extra/clang-tidy/bugprone/SuspiciousMemsetUsageCheck.h -clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SuspiciousMissingCommaCheck.h -clang-tools-extra/clang-tidy/bugprone/SuspiciousSemicolonCheck.h -clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SuspiciousStringCompareCheck.h -clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.cpp -clang-tools-extra/clang-tidy/bugprone/SwappedArgumentsCheck.h -clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.cpp -clang-tools-extra/clang-tidy/bugprone/TerminatingContinueCheck.h -clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.cpp -clang-tools-extra/clang-tidy/bugprone/ThrowKeywordMissingCheck.h -clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.cpp -clang-tools-extra/clang-tidy/bugprone/TooSmallLoopVariableCheck.h -clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.cpp -clang-tools-extra/clang-tidy/bugprone/UndefinedMemoryManipulationCheck.h -clang-tools-extra/clang-tidy/bugprone/UndelegatedConstructorCheck.h -clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.cpp -clang-tools-extra/clang-tidy/bugprone/UnhandledExceptionAtNewCheck.h -clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.cpp -clang-tools-extra/clang-tidy/bugprone/UnhandledSelfAssignmentCheck.h -clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.cpp -clang-tools-extra/clang-tidy/bugprone/UnusedRaiiCheck.h -clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.cpp -clang-tools-extra/clang-tidy/bugprone/UnusedReturnValueCheck.h -clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.h -clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.cpp -clang-tools-extra/clang-tidy/bugprone/VirtualNearMissCheck.h -clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.cpp -clang-tools-extra/clang-tidy/cert/CommandProcessorCheck.h -clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.cpp -clang-tools-extra/clang-tidy/cert/DefaultOperatorNewAlignmentCheck.h -clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.cpp -clang-tools-extra/clang-tidy/cert/DontModifyStdNamespaceCheck.h -clang-tools-extra/clang-tidy/cert/FloatLoopCounter.cpp -clang-tools-extra/clang-tidy/cert/FloatLoopCounter.h -clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.cpp -clang-tools-extra/clang-tidy/cert/LimitedRandomnessCheck.h -clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.cpp -clang-tools-extra/clang-tidy/cert/MutatingCopyCheck.h -clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.cpp -clang-tools-extra/clang-tidy/cert/NonTrivialTypesLibcMemoryCallsCheck.h -clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.cpp -clang-tools-extra/clang-tidy/cert/ProperlySeededRandomGeneratorCheck.h -clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.cpp -clang-tools-extra/clang-tidy/cert/SetLongJmpCheck.h -clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.cpp -clang-tools-extra/clang-tidy/cert/StaticObjectExceptionCheck.h -clang-tools-extra/clang-tidy/cert/StrToNumCheck.cpp -clang-tools-extra/clang-tidy/cert/StrToNumCheck.h -clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.cpp -clang-tools-extra/clang-tidy/cert/ThrownExceptionTypeCheck.h -clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.cpp -clang-tools-extra/clang-tidy/cert/VariadicFunctionDefCheck.h -clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.cpp -clang-tools-extra/clang-tidy/concurrency/MtUnsafeCheck.h -clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.cpp -clang-tools-extra/clang-tidy/concurrency/ThreadCanceltypeAsynchronousCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidGotoCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/AvoidNonConstGlobalVariablesCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/CppCoreGuidelinesTidyModule.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/NoMallocCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/OwningMemoryCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsArrayToPointerDecayCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsPointerArithmeticCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeConstCastCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeCstyleCastCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeReinterpretCastCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeStaticCastDowncastCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeUnionAccessCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeVarargCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/SlicingCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/SpecialMemberFunctionsCheck.h -clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.cpp -clang-tools-extra/clang-tidy/cppcoreguidelines/VirtualClassDestructorCheck.h -clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.cpp -clang-tools-extra/clang-tidy/darwin/DispatchOnceNonstaticCheck.h -clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.cpp -clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsCallsCheck.h -clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.cpp -clang-tools-extra/clang-tidy/fuchsia/DefaultArgumentsDeclarationsCheck.h -clang-tools-extra/clang-tidy/fuchsia/FuchsiaTidyModule.cpp -clang-tools-extra/clang-tidy/fuchsia/MultipleInheritanceCheck.h -clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.cpp -clang-tools-extra/clang-tidy/fuchsia/OverloadedOperatorCheck.h -clang-tools-extra/clang-tidy/fuchsia/StaticallyConstructedObjectsCheck.cpp -clang-tools-extra/clang-tidy/fuchsia/TrailingReturnCheck.cpp -clang-tools-extra/clang-tidy/google/AvoidCStyleCastsCheck.h -clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.cpp -clang-tools-extra/clang-tidy/google/AvoidNSObjectNewCheck.h -clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp -clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.h -clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.cpp -clang-tools-extra/clang-tidy/google/DefaultArgumentsCheck.h -clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.h -clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.cpp -clang-tools-extra/clang-tidy/google/ExplicitMakePairCheck.h -clang-tools-extra/clang-tidy/google/FunctionNamingCheck.cpp -clang-tools-extra/clang-tidy/google/FunctionNamingCheck.h -clang-tools-extra/clang-tidy/google/GlobalNamesInHeadersCheck.h -clang-tools-extra/clang-tidy/google/IntegerTypesCheck.h -clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.cpp -clang-tools-extra/clang-tidy/google/OverloadedUnaryAndCheck.h -clang-tools-extra/clang-tidy/google/TodoCommentCheck.cpp -clang-tools-extra/clang-tidy/google/TodoCommentCheck.h -clang-tools-extra/clang-tidy/google/UnnamedNamespaceInHeaderCheck.h -clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.cpp -clang-tools-extra/clang-tidy/google/UpgradeGoogletestCaseCheck.h -clang-tools-extra/clang-tidy/google/UsingNamespaceDirectiveCheck.h -clang-tools-extra/clang-tidy/hicpp/ExceptionBaseclassCheck.cpp -clang-tools-extra/clang-tidy/hicpp/HICPPTidyModule.cpp -clang-tools-extra/clang-tidy/hicpp/MultiwayPathsCoveredCheck.h -clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.cpp -clang-tools-extra/clang-tidy/hicpp/NoAssemblerCheck.h -clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.cpp -clang-tools-extra/clang-tidy/hicpp/SignedBitwiseCheck.h -clang-tools-extra/clang-tidy/linuxkernel/LinuxKernelTidyModule.cpp -clang-tools-extra/clang-tidy/linuxkernel/MustCheckErrsCheck.h -clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.cpp -clang-tools-extra/clang-tidy/llvm/HeaderGuardCheck.h -clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.cpp -clang-tools-extra/clang-tidy/llvm/IncludeOrderCheck.h -clang-tools-extra/clang-tidy/llvm/LLVMTidyModule.cpp -clang-tools-extra/clang-tidy/llvm/PreferIsaOrDynCastInConditionalsCheck.cpp -clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.cpp -clang-tools-extra/clang-tidy/llvm/PreferRegisterOverUnsignedCheck.h -clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.cpp -clang-tools-extra/clang-tidy/llvm/TwineLocalCheck.h -clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.cpp -clang-tools-extra/clang-tidy/llvmlibc/CalleeNamespaceCheck.h -clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp -clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.h -clang-tools-extra/clang-tidy/llvmlibc/LLVMLibcTidyModule.cpp -clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.cpp -clang-tools-extra/clang-tidy/llvmlibc/RestrictSystemLibcHeadersCheck.h -clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp -clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h -clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp -clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp -clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h -clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.cpp -clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h -clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.cpp -clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.h -clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.cpp -clang-tools-extra/clang-tidy/misc/NewDeleteOverloadsCheck.h -clang-tools-extra/clang-tidy/misc/NonCopyableObjects.h -clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.cpp -clang-tools-extra/clang-tidy/misc/NonPrivateMemberVariablesInClassesCheck.h -clang-tools-extra/clang-tidy/misc/NoRecursionCheck.cpp -clang-tools-extra/clang-tidy/misc/NoRecursionCheck.h -clang-tools-extra/clang-tidy/misc/RedundantExpressionCheck.h -clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp -clang-tools-extra/clang-tidy/misc/StaticAssertCheck.h -clang-tools-extra/clang-tidy/misc/ThrowByValueCatchByReferenceCheck.cpp -clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.cpp -clang-tools-extra/clang-tidy/misc/UnconventionalAssignOperatorCheck.h -clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.cpp -clang-tools-extra/clang-tidy/misc/UniqueptrResetReleaseCheck.h -clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.cpp -clang-tools-extra/clang-tidy/misc/UnusedAliasDeclsCheck.h -clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp -clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.h -clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp -clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.h -clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.cpp -clang-tools-extra/clang-tidy/modernize/AvoidBindCheck.h -clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.cpp -clang-tools-extra/clang-tidy/modernize/AvoidCArraysCheck.h -clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.cpp -clang-tools-extra/clang-tidy/modernize/ConcatNestedNamespacesCheck.h -clang-tools-extra/clang-tidy/modernize/DeprecatedHeadersCheck.h -clang-tools-extra/clang-tidy/modernize/DeprecatedIosBaseAliasesCheck.cpp -clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.h -clang-tools-extra/clang-tidy/modernize/LoopConvertUtils.h -clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.cpp -clang-tools-extra/clang-tidy/modernize/MakeSharedCheck.h -clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.h -clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.cpp -clang-tools-extra/clang-tidy/modernize/MakeUniqueCheck.h -clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp -clang-tools-extra/clang-tidy/modernize/PassByValueCheck.h -clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp -clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.h -clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.cpp -clang-tools-extra/clang-tidy/modernize/RedundantVoidArgCheck.h -clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.h -clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.cpp -clang-tools-extra/clang-tidy/modernize/ReplaceDisallowCopyAndAssignMacroCheck.h -clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp -clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.h -clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.cpp -clang-tools-extra/clang-tidy/modernize/ReturnBracedInitListCheck.h -clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.cpp -clang-tools-extra/clang-tidy/modernize/ShrinkToFitCheck.h -clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.cpp -clang-tools-extra/clang-tidy/modernize/UnaryStaticAssertCheck.h -clang-tools-extra/clang-tidy/modernize/UseAutoCheck.h -clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.cpp -clang-tools-extra/clang-tidy/modernize/UseBoolLiteralsCheck.h -clang-tools-extra/clang-tidy/modernize/UseDefaultMemberInitCheck.h -clang-tools-extra/clang-tidy/modernize/UseEmplaceCheck.h -clang-tools-extra/clang-tidy/modernize/UseEqualsDefaultCheck.cpp -clang-tools-extra/clang-tidy/modernize/UseEqualsDeleteCheck.cpp -clang-tools-extra/clang-tidy/modernize/UseNodiscardCheck.h -clang-tools-extra/clang-tidy/modernize/UseNoexceptCheck.h -clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h -clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp -clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.h -clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h -clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.cpp -clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp -clang-tools-extra/clang-tidy/modernize/UseUsingCheck.h -clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.cpp -clang-tools-extra/clang-tidy/mpi/BufferDerefCheck.h -clang-tools-extra/clang-tidy/mpi/MPITidyModule.cpp -clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.cpp -clang-tools-extra/clang-tidy/mpi/TypeMismatchCheck.h -clang-tools-extra/clang-tidy/objc/AssertEquals.cpp -clang-tools-extra/clang-tidy/objc/AssertEquals.h -clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.cpp -clang-tools-extra/clang-tidy/objc/DeallocInCategoryCheck.h -clang-tools-extra/clang-tidy/objc/ForbiddenSubclassingCheck.h -clang-tools-extra/clang-tidy/objc/MissingHashCheck.cpp -clang-tools-extra/clang-tidy/objc/MissingHashCheck.h -clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.cpp -clang-tools-extra/clang-tidy/objc/NSInvocationArgumentLifetimeCheck.h -clang-tools-extra/clang-tidy/objc/PropertyDeclarationCheck.h -clang-tools-extra/clang-tidy/objc/SuperSelfCheck.cpp -clang-tools-extra/clang-tidy/objc/SuperSelfCheck.h -clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.cpp -clang-tools-extra/clang-tidy/openmp/ExceptionEscapeCheck.h -clang-tools-extra/clang-tidy/openmp/OpenMPTidyModule.cpp -clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.cpp -clang-tools-extra/clang-tidy/openmp/UseDefaultNoneCheck.h -clang-tools-extra/clang-tidy/performance/FasterStringFindCheck.cpp -clang-tools-extra/clang-tidy/performance/ForRangeCopyCheck.cpp -clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.cpp -clang-tools-extra/clang-tidy/performance/InefficientAlgorithmCheck.h -clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.cpp -clang-tools-extra/clang-tidy/performance/InefficientStringConcatenationCheck.h -clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp -clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h -clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp -clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.h -clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.cpp -clang-tools-extra/clang-tidy/performance/NoAutomaticMoveCheck.h -clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.cpp -clang-tools-extra/clang-tidy/performance/NoexceptMoveConstructorCheck.h -clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.cpp -clang-tools-extra/clang-tidy/performance/NoIntToPtrCheck.h -clang-tools-extra/clang-tidy/performance/PerformanceTidyModule.cpp -clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.cpp -clang-tools-extra/clang-tidy/performance/TriviallyDestructibleCheck.h -clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp -clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.h -clang-tools-extra/clang-tidy/performance/UnnecessaryCopyInitialization.cpp -clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp -clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.h -clang-tools-extra/clang-tidy/plugin/ClangTidyPlugin.cpp -clang-tools-extra/clang-tidy/portability/PortabilityTidyModule.cpp -clang-tools-extra/clang-tidy/portability/RestrictSystemIncludesCheck.cpp -clang-tools-extra/clang-tidy/portability/SIMDIntrinsicsCheck.cpp -clang-tools-extra/clang-tidy/readability/AvoidConstParamsInDecls.h -clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp -clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h -clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp -clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp -clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h -clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp -clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h -clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.h -clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.cpp -clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h -clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp -clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h -clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp -clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h -clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.h -clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp -clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h -clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp -clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.h -clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.cpp -clang-tools-extra/clang-tidy/readability/IdentifierLengthCheck.h -clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp -clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h -clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp -clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.h -clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.h -clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.cpp -clang-tools-extra/clang-tidy/readability/IsolateDeclarationCheck.h -clang-tools-extra/clang-tidy/readability/MagicNumbersCheck.cpp -clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.cpp -clang-tools-extra/clang-tidy/readability/MakeMemberFunctionConstCheck.h -clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.cpp -clang-tools-extra/clang-tidy/readability/MisleadingIndentationCheck.h -clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.cpp -clang-tools-extra/clang-tidy/readability/MisplacedArrayIndexCheck.h -clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp -clang-tools-extra/clang-tidy/readability/NamedParameterCheck.h -clang-tools-extra/clang-tidy/readability/NamespaceCommentCheck.h -clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.cpp -clang-tools-extra/clang-tidy/readability/NonConstParameterCheck.h -clang-tools-extra/clang-tidy/readability/QualifiedAutoCheck.h -clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp -clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.cpp -clang-tools-extra/clang-tidy/readability/RedundantAccessSpecifiersCheck.h -clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.cpp -clang-tools-extra/clang-tidy/readability/RedundantControlFlowCheck.h -clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.cpp -clang-tools-extra/clang-tidy/readability/RedundantDeclarationCheck.h -clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.cpp -clang-tools-extra/clang-tidy/readability/RedundantMemberInitCheck.h -clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.cpp -clang-tools-extra/clang-tidy/readability/RedundantPreprocessorCheck.h -clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.cpp -clang-tools-extra/clang-tidy/readability/RedundantSmartptrGetCheck.h -clang-tools-extra/clang-tidy/readability/RedundantStringCStrCheck.h -clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.cpp -clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.h -clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp -clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h -clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprMatchers.h -clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.cpp -clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.cpp -clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h -clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp -clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.h -clang-tools-extra/clang-tidy/readability/StringCompareCheck.cpp -clang-tools-extra/clang-tidy/readability/StringCompareCheck.h -clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp -clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.h -clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.cpp -clang-tools-extra/clang-tidy/readability/UniqueptrDeleteReleaseCheck.h -clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp -clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.h -clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.cpp -clang-tools-extra/clang-tidy/readability/UseAnyOfAllOfCheck.h -clang-tools-extra/clang-tidy/tool/ClangTidyMain.h -clang-tools-extra/clang-tidy/tool/ClangTidyToolMain.cpp -clang-tools-extra/clang-tidy/utils/Aliasing.cpp -clang-tools-extra/clang-tidy/utils/Aliasing.h -clang-tools-extra/clang-tidy/utils/ASTUtils.cpp -clang-tools-extra/clang-tidy/utils/ASTUtils.h -clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.cpp -clang-tools-extra/clang-tidy/utils/DeclRefExprUtils.h -clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.h -clang-tools-extra/clang-tidy/utils/ExprSequence.cpp -clang-tools-extra/clang-tidy/utils/ExprSequence.h -clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.cpp -clang-tools-extra/clang-tidy/utils/FileExtensionsUtils.h -clang-tools-extra/clang-tidy/utils/FixItHintUtils.cpp -clang-tools-extra/clang-tidy/utils/FixItHintUtils.h -clang-tools-extra/clang-tidy/utils/HeaderGuard.cpp -clang-tools-extra/clang-tidy/utils/HeaderGuard.h -clang-tools-extra/clang-tidy/utils/IncludeInserter.cpp -clang-tools-extra/clang-tidy/utils/IncludeInserter.h -clang-tools-extra/clang-tidy/utils/IncludeSorter.h -clang-tools-extra/clang-tidy/utils/LexerUtils.h -clang-tools-extra/clang-tidy/utils/Matchers.h -clang-tools-extra/clang-tidy/utils/NamespaceAliaser.cpp -clang-tools-extra/clang-tidy/utils/NamespaceAliaser.h -clang-tools-extra/clang-tidy/utils/OptionsUtils.cpp -clang-tools-extra/clang-tidy/utils/OptionsUtils.h -clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp -clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.h -clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp -clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.h -clang-tools-extra/clang-tidy/utils/TypeTraits.h -clang-tools-extra/clang-tidy/utils/UsingInserter.cpp -clang-tools-extra/clang-tidy/utils/UsingInserter.h -clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.cpp -clang-tools-extra/clang-tidy/zircon/TemporaryObjectsCheck.h -clang-tools-extra/clang-tidy/zircon/ZirconTidyModule.cpp -clang-tools-extra/clangd/AST.cpp -clang-tools-extra/clangd/AST.h -clang-tools-extra/clangd/ASTSignals.cpp -clang-tools-extra/clangd/ASTSignals.h -clang-tools-extra/clangd/ClangdLSPServer.cpp -clang-tools-extra/clangd/ClangdLSPServer.h -clang-tools-extra/clangd/ClangdServer.h -clang-tools-extra/clangd/CodeComplete.cpp -clang-tools-extra/clangd/CodeComplete.h -clang-tools-extra/clangd/CodeCompletionStrings.h -clang-tools-extra/clangd/CollectMacros.cpp -clang-tools-extra/clangd/CollectMacros.h -clang-tools-extra/clangd/CompileCommands.cpp -clang-tools-extra/clangd/CompileCommands.h -clang-tools-extra/clangd/Compiler.cpp -clang-tools-extra/clangd/Compiler.h -clang-tools-extra/clangd/Config.cpp -clang-tools-extra/clangd/Config.h -clang-tools-extra/clangd/ConfigCompile.cpp -clang-tools-extra/clangd/ConfigFragment.h -clang-tools-extra/clangd/ConfigProvider.cpp -clang-tools-extra/clangd/ConfigProvider.h -clang-tools-extra/clangd/Diagnostics.cpp -clang-tools-extra/clangd/Diagnostics.h -clang-tools-extra/clangd/DraftStore.cpp -clang-tools-extra/clangd/DraftStore.h -clang-tools-extra/clangd/DumpAST.cpp -clang-tools-extra/clangd/DumpAST.h -clang-tools-extra/clangd/ExpectedTypes.cpp -clang-tools-extra/clangd/ExpectedTypes.h -clang-tools-extra/clangd/Feature.cpp -clang-tools-extra/clangd/Feature.h -clang-tools-extra/clangd/FeatureModule.cpp -clang-tools-extra/clangd/FeatureModule.h -clang-tools-extra/clangd/FileDistance.cpp -clang-tools-extra/clangd/FileDistance.h -clang-tools-extra/clangd/FindSymbols.cpp -clang-tools-extra/clangd/FindSymbols.h -clang-tools-extra/clangd/FindTarget.cpp -clang-tools-extra/clangd/FindTarget.h -clang-tools-extra/clangd/FS.h -clang-tools-extra/clangd/FuzzyMatch.cpp -clang-tools-extra/clangd/FuzzyMatch.h -clang-tools-extra/clangd/GlobalCompilationDatabase.cpp -clang-tools-extra/clangd/GlobalCompilationDatabase.h -clang-tools-extra/clangd/Headers.cpp -clang-tools-extra/clangd/Headers.h -clang-tools-extra/clangd/HeaderSourceSwitch.cpp -clang-tools-extra/clangd/HeaderSourceSwitch.h -clang-tools-extra/clangd/HeuristicResolver.cpp -clang-tools-extra/clangd/HeuristicResolver.h -clang-tools-extra/clangd/Hover.cpp -clang-tools-extra/clangd/Hover.h -clang-tools-extra/clangd/IncludeCleaner.cpp -clang-tools-extra/clangd/IncludeCleaner.h -clang-tools-extra/clangd/IncludeFixer.cpp -clang-tools-extra/clangd/InlayHints.h -clang-tools-extra/clangd/LSPBinder.h -clang-tools-extra/clangd/ParsedAST.cpp -clang-tools-extra/clangd/ParsedAST.h -clang-tools-extra/clangd/PathMapping.h -clang-tools-extra/clangd/Preamble.cpp -clang-tools-extra/clangd/Preamble.h -clang-tools-extra/clangd/Protocol.cpp -clang-tools-extra/clangd/Protocol.h -clang-tools-extra/clangd/Quality.cpp -clang-tools-extra/clangd/RIFF.cpp -clang-tools-extra/clangd/RIFF.h -clang-tools-extra/clangd/Selection.h -clang-tools-extra/clangd/SemanticHighlighting.h -clang-tools-extra/clangd/SemanticSelection.cpp -clang-tools-extra/clangd/SemanticSelection.h -clang-tools-extra/clangd/SourceCode.cpp -clang-tools-extra/clangd/SourceCode.h -clang-tools-extra/clangd/TidyProvider.cpp -clang-tools-extra/clangd/TidyProvider.h -clang-tools-extra/clangd/Transport.h -clang-tools-extra/clangd/TUScheduler.cpp -clang-tools-extra/clangd/TUScheduler.h -clang-tools-extra/clangd/URI.h -clang-tools-extra/clangd/XRefs.h -clang-tools-extra/clangd/benchmarks/IndexBenchmark.cpp -clang-tools-extra/clangd/fuzzer/clangd-fuzzer.cpp -clang-tools-extra/clangd/fuzzer/FuzzerClangdMain.cpp -clang-tools-extra/clangd/index/Background.cpp -clang-tools-extra/clangd/index/Background.h -clang-tools-extra/clangd/index/BackgroundIndexLoader.cpp -clang-tools-extra/clangd/index/BackgroundIndexLoader.h -clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp -clang-tools-extra/clangd/index/BackgroundQueue.cpp -clang-tools-extra/clangd/index/BackgroundRebuild.cpp -clang-tools-extra/clangd/index/BackgroundRebuild.h -clang-tools-extra/clangd/index/CanonicalIncludes.cpp -clang-tools-extra/clangd/index/CanonicalIncludes.h -clang-tools-extra/clangd/index/FileIndex.cpp -clang-tools-extra/clangd/index/FileIndex.h -clang-tools-extra/clangd/index/Index.cpp -clang-tools-extra/clangd/index/Index.h -clang-tools-extra/clangd/index/IndexAction.cpp -clang-tools-extra/clangd/index/IndexAction.h -clang-tools-extra/clangd/index/MemIndex.h -clang-tools-extra/clangd/index/Merge.cpp -clang-tools-extra/clangd/index/Merge.h -clang-tools-extra/clangd/index/ProjectAware.cpp -clang-tools-extra/clangd/index/ProjectAware.h -clang-tools-extra/clangd/index/Ref.cpp -clang-tools-extra/clangd/index/Ref.h -clang-tools-extra/clangd/index/Relation.cpp -clang-tools-extra/clangd/index/Relation.h -clang-tools-extra/clangd/index/Serialization.cpp -clang-tools-extra/clangd/index/Serialization.h -clang-tools-extra/clangd/index/Symbol.cpp -clang-tools-extra/clangd/index/Symbol.h -clang-tools-extra/clangd/index/SymbolCollector.cpp -clang-tools-extra/clangd/index/SymbolID.cpp -clang-tools-extra/clangd/index/SymbolLocation.cpp -clang-tools-extra/clangd/index/SymbolLocation.h -clang-tools-extra/clangd/index/SymbolOrigin.cpp -clang-tools-extra/clangd/index/SymbolOrigin.h -clang-tools-extra/clangd/index/YAMLSerialization.cpp -clang-tools-extra/clangd/index/dex/Iterator.cpp -clang-tools-extra/clangd/index/dex/Iterator.h -clang-tools-extra/clangd/index/dex/PostingList.cpp -clang-tools-extra/clangd/index/dex/PostingList.h -clang-tools-extra/clangd/index/dex/Token.h -clang-tools-extra/clangd/index/dex/Trigram.cpp -clang-tools-extra/clangd/index/dex/Trigram.h -clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp -clang-tools-extra/clangd/index/remote/Client.cpp -clang-tools-extra/clangd/index/remote/Client.h -clang-tools-extra/clangd/index/remote/marshalling/Marshalling.cpp -clang-tools-extra/clangd/index/remote/marshalling/Marshalling.h -clang-tools-extra/clangd/index/remote/monitor/Monitor.cpp -clang-tools-extra/clangd/index/remote/server/Server.cpp -clang-tools-extra/clangd/index/remote/unimplemented/UnimplementedClient.cpp -clang-tools-extra/clangd/indexer/IndexerMain.cpp -clang-tools-extra/clangd/refactor/InsertionPoint.cpp -clang-tools-extra/clangd/refactor/InsertionPoint.h -clang-tools-extra/clangd/refactor/Rename.h -clang-tools-extra/clangd/refactor/Tweak.cpp -clang-tools-extra/clangd/refactor/Tweak.h -clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp -clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp -clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp -clang-tools-extra/clangd/refactor/tweaks/DefineOutline.cpp -clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp -clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp -clang-tools-extra/clangd/refactor/tweaks/ExtractFunction.cpp -clang-tools-extra/clangd/refactor/tweaks/ObjCLocalizeStringLiteral.cpp -clang-tools-extra/clangd/refactor/tweaks/RemoveUsingNamespace.cpp -clang-tools-extra/clangd/refactor/tweaks/SwapIfBranches.cpp -clang-tools-extra/clangd/support/Cancellation.cpp -clang-tools-extra/clangd/support/Cancellation.h -clang-tools-extra/clangd/support/Context.cpp -clang-tools-extra/clangd/support/Context.h -clang-tools-extra/clangd/support/FileCache.cpp -clang-tools-extra/clangd/support/FileCache.h -clang-tools-extra/clangd/support/Function.h -clang-tools-extra/clangd/support/Logger.cpp -clang-tools-extra/clangd/support/Markup.cpp -clang-tools-extra/clangd/support/Markup.h -clang-tools-extra/clangd/support/MemoryTree.cpp -clang-tools-extra/clangd/support/MemoryTree.h -clang-tools-extra/clangd/support/Path.cpp -clang-tools-extra/clangd/support/Path.h -clang-tools-extra/clangd/support/Shutdown.cpp -clang-tools-extra/clangd/support/Shutdown.h -clang-tools-extra/clangd/support/ThreadCrashReporter.cpp -clang-tools-extra/clangd/support/ThreadCrashReporter.h -clang-tools-extra/clangd/support/Threading.cpp -clang-tools-extra/clangd/support/Threading.h -clang-tools-extra/clangd/support/ThreadsafeFS.cpp -clang-tools-extra/clangd/support/ThreadsafeFS.h -clang-tools-extra/clangd/support/Trace.cpp -clang-tools-extra/clangd/support/Trace.h -clang-tools-extra/clangd/tool/Check.cpp -clang-tools-extra/clangd/tool/ClangdMain.cpp -clang-tools-extra/clangd/unittests/Annotations.cpp -clang-tools-extra/clangd/unittests/Annotations.h -clang-tools-extra/clangd/unittests/ASTSignalsTests.cpp -clang-tools-extra/clangd/unittests/ASTTests.cpp -clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp -clang-tools-extra/clangd/unittests/CallHierarchyTests.cpp -clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp -clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp -clang-tools-extra/clangd/unittests/ClangdTests.cpp -clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp -clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp -clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp -clang-tools-extra/clangd/unittests/CompilerTests.cpp -clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp -clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp -clang-tools-extra/clangd/unittests/ConfigTesting.h -clang-tools-extra/clangd/unittests/ConfigYAMLTests.cpp -clang-tools-extra/clangd/unittests/DecisionForestTests.cpp -clang-tools-extra/clangd/unittests/DexTests.cpp -clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp -clang-tools-extra/clangd/unittests/DraftStoreTests.cpp -clang-tools-extra/clangd/unittests/DumpASTTests.cpp -clang-tools-extra/clangd/unittests/ExpectedTypeTest.cpp -clang-tools-extra/clangd/unittests/FeatureModulesTests.cpp -clang-tools-extra/clangd/unittests/FileDistanceTests.cpp -clang-tools-extra/clangd/unittests/FileIndexTests.cpp -clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp -clang-tools-extra/clangd/unittests/FindTargetTests.cpp -clang-tools-extra/clangd/unittests/FSTests.cpp -clang-tools-extra/clangd/unittests/FuzzyMatchTests.cpp -clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp -clang-tools-extra/clangd/unittests/HeadersTests.cpp -clang-tools-extra/clangd/unittests/HoverTests.cpp -clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp -clang-tools-extra/clangd/unittests/IndexActionTests.cpp -clang-tools-extra/clangd/unittests/InlayHintTests.cpp -clang-tools-extra/clangd/unittests/InsertionPointTests.cpp -clang-tools-extra/clangd/unittests/LoggerTests.cpp -clang-tools-extra/clangd/unittests/LSPBinderTests.cpp -clang-tools-extra/clangd/unittests/LSPClient.cpp -clang-tools-extra/clangd/unittests/ModulesTests.cpp -clang-tools-extra/clangd/unittests/ParsedASTTests.cpp -clang-tools-extra/clangd/unittests/PreambleTests.cpp -clang-tools-extra/clangd/unittests/PrintASTTests.cpp -clang-tools-extra/clangd/unittests/ProjectAwareIndexTests.cpp -clang-tools-extra/clangd/unittests/QualityTests.cpp -clang-tools-extra/clangd/unittests/RIFFTests.cpp -clang-tools-extra/clangd/unittests/SelectionTests.cpp -clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp -clang-tools-extra/clangd/unittests/SerializationTests.cpp -clang-tools-extra/clangd/unittests/SourceCodeTests.cpp -clang-tools-extra/clangd/unittests/SymbolInfoTests.cpp -clang-tools-extra/clangd/unittests/SyncAPI.cpp -clang-tools-extra/clangd/unittests/SyncAPI.h -clang-tools-extra/clangd/unittests/TestFS.cpp -clang-tools-extra/clangd/unittests/TestFS.h -clang-tools-extra/clangd/unittests/TestIndex.cpp -clang-tools-extra/clangd/unittests/TestIndex.h -clang-tools-extra/clangd/unittests/TestTU.cpp -clang-tools-extra/clangd/unittests/TestTU.h -clang-tools-extra/clangd/unittests/TestWorkspace.cpp -clang-tools-extra/clangd/unittests/TestWorkspace.h -clang-tools-extra/clangd/unittests/ThreadCrashReporterTests.cpp -clang-tools-extra/clangd/unittests/TidyProviderTests.cpp -clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp -clang-tools-extra/clangd/unittests/URITests.cpp -clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h -clang-tools-extra/clangd/unittests/remote/MarshallingTests.cpp -clang-tools-extra/clangd/unittests/support/CancellationTests.cpp -clang-tools-extra/clangd/unittests/support/ContextTests.cpp -clang-tools-extra/clangd/unittests/support/FileCacheTests.cpp -clang-tools-extra/clangd/unittests/support/FunctionTests.cpp -clang-tools-extra/clangd/unittests/support/MarkupTests.cpp -clang-tools-extra/clangd/unittests/support/MemoryTreeTests.cpp -clang-tools-extra/clangd/unittests/support/PathTests.cpp -clang-tools-extra/clangd/unittests/support/TestTracer.cpp -clang-tools-extra/clangd/unittests/support/TestTracer.h -clang-tools-extra/clangd/unittests/support/ThreadingTests.cpp -clang-tools-extra/clangd/unittests/support/TraceTests.cpp -clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp -clang-tools-extra/clangd/unittests/tweaks/AnnotateHighlightingsTests.cpp -clang-tools-extra/clangd/unittests/tweaks/DefineOutlineTests.cpp -clang-tools-extra/clangd/unittests/tweaks/DumpASTTests.cpp -clang-tools-extra/clangd/unittests/tweaks/DumpRecordLayoutTests.cpp -clang-tools-extra/clangd/unittests/tweaks/DumpSymbolTests.cpp -clang-tools-extra/clangd/unittests/tweaks/ExpandDeducedTypeTests.cpp -clang-tools-extra/clangd/unittests/tweaks/ExpandMacroTests.cpp -clang-tools-extra/clangd/unittests/tweaks/ExtractFunctionTests.cpp -clang-tools-extra/clangd/unittests/tweaks/ExtractVariableTests.cpp -clang-tools-extra/clangd/unittests/tweaks/ObjCLocalizeStringLiteralTests.cpp -clang-tools-extra/clangd/unittests/tweaks/PopulateSwitchTests.cpp -clang-tools-extra/clangd/unittests/tweaks/RawStringLiteralTests.cpp -clang-tools-extra/clangd/unittests/tweaks/RemoveUsingNamespaceTests.cpp -clang-tools-extra/clangd/unittests/tweaks/ShowSelectionTreeTests.cpp -clang-tools-extra/clangd/unittests/tweaks/SwapIfBranchesTests.cpp -clang-tools-extra/clangd/unittests/tweaks/TweakTesting.cpp -clang-tools-extra/clangd/unittests/tweaks/TweakTesting.h -clang-tools-extra/clangd/unittests/tweaks/TweakTests.cpp -clang-tools-extra/clangd/unittests/xpc/ConversionTests.cpp -clang-tools-extra/clangd/xpc/Conversion.cpp -clang-tools-extra/clangd/xpc/Conversion.h -clang-tools-extra/clangd/xpc/XPCTransport.cpp -clang-tools-extra/clangd/xpc/framework/ClangdXPC.cpp -clang-tools-extra/clangd/xpc/test-client/ClangdXPCTestClient.cpp -clang-tools-extra/modularize/Modularize.h -clang-tools-extra/pp-trace/PPTrace.cpp -clang-tools-extra/tool-template/ToolTemplate.cpp -clang-tools-extra/unittests/clang-apply-replacements/ApplyReplacementsTest.cpp -clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp -clang-tools-extra/unittests/clang-doc/ClangDocTest.cpp -clang-tools-extra/unittests/clang-doc/ClangDocTest.h -clang-tools-extra/unittests/clang-doc/GeneratorTest.cpp -clang-tools-extra/unittests/clang-doc/HTMLGeneratorTest.cpp -clang-tools-extra/unittests/clang-doc/MDGeneratorTest.cpp -clang-tools-extra/unittests/clang-doc/MergeTest.cpp -clang-tools-extra/unittests/clang-doc/SerializeTest.cpp -clang-tools-extra/unittests/clang-doc/YAMLGeneratorTest.cpp -clang-tools-extra/unittests/clang-tidy/AddConstTest.cpp -clang-tools-extra/unittests/clang-tidy/ClangTidyDiagnosticConsumerTest.cpp -clang-tools-extra/unittests/clang-tidy/ClangTidyTest.h -clang-tools-extra/unittests/clang-tidy/DeclRefExprUtilsTest.cpp -clang-tools-extra/unittests/clang-tidy/GlobListTest.cpp -clang-tools-extra/unittests/clang-tidy/OptionsProviderTest.cpp -clang-tools-extra/unittests/clang-tidy/OverlappingReplacementsTest.cpp -clang-tools-extra/unittests/clang-tidy/ReadabilityModuleTest.cpp -clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp -compiler-rt/include/sanitizer/linux_syscall_hooks.h -compiler-rt/include/sanitizer/memprof_interface.h -compiler-rt/include/sanitizer/netbsd_syscall_hooks.h -compiler-rt/include/xray/xray_interface.h -compiler-rt/include/xray/xray_log_interface.h -compiler-rt/lib/asan/asan_activation.h -compiler-rt/lib/asan/asan_lock.h -compiler-rt/lib/asan/asan_mapping.h -compiler-rt/lib/asan/asan_mapping_sparc64.h -compiler-rt/lib/asan/asan_rtl_static.cpp -compiler-rt/lib/asan/tests/asan_globals_test.cpp -compiler-rt/lib/builtins/fp_extend.h -compiler-rt/lib/builtins/fp_lib.h -compiler-rt/lib/builtins/fp_mode.h -compiler-rt/lib/builtins/fp_trunc.h -compiler-rt/lib/builtins/int_endianness.h -compiler-rt/lib/builtins/int_math.h -compiler-rt/lib/builtins/int_types.h -compiler-rt/lib/builtins/int_util.h -compiler-rt/lib/builtins/unwind-ehabi-helpers.h -compiler-rt/lib/builtins/ppc/DD.h -compiler-rt/lib/dfsan/dfsan_allocator.cpp -compiler-rt/lib/dfsan/dfsan_allocator.h -compiler-rt/lib/dfsan/dfsan_chained_origin_depot.cpp -compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h -compiler-rt/lib/dfsan/dfsan_flags.h -compiler-rt/lib/dfsan/dfsan_interceptors.cpp -compiler-rt/lib/dfsan/dfsan_origin.h -compiler-rt/lib/dfsan/dfsan_platform.h -compiler-rt/lib/dfsan/dfsan_thread.h -compiler-rt/lib/fuzzer/FuzzerCommand.h -compiler-rt/lib/fuzzer/FuzzerExtFunctions.h -compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp -compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp -compiler-rt/lib/fuzzer/FuzzerExtraCountersDarwin.cpp -compiler-rt/lib/fuzzer/FuzzerExtraCountersWindows.cpp -compiler-rt/lib/fuzzer/FuzzerFork.h -compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp -compiler-rt/lib/fuzzer/FuzzerPlatform.h -compiler-rt/lib/fuzzer/tests/FuzzedDataProviderUnittest.cpp -compiler-rt/lib/gwp_asan/common.cpp -compiler-rt/lib/gwp_asan/common.h -compiler-rt/lib/gwp_asan/crash_handler.cpp -compiler-rt/lib/gwp_asan/crash_handler.h -compiler-rt/lib/gwp_asan/definitions.h -compiler-rt/lib/gwp_asan/guarded_pool_allocator.cpp -compiler-rt/lib/gwp_asan/guarded_pool_allocator.h -compiler-rt/lib/gwp_asan/mutex.h -compiler-rt/lib/gwp_asan/options.h -compiler-rt/lib/gwp_asan/stack_trace_compressor.cpp -compiler-rt/lib/gwp_asan/stack_trace_compressor.h -compiler-rt/lib/gwp_asan/utilities.h -compiler-rt/lib/gwp_asan/optional/backtrace.h -compiler-rt/lib/gwp_asan/optional/backtrace_fuchsia.cpp -compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp -compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp -compiler-rt/lib/gwp_asan/optional/options_parser.cpp -compiler-rt/lib/gwp_asan/optional/options_parser.h -compiler-rt/lib/gwp_asan/optional/printf.h -compiler-rt/lib/gwp_asan/optional/segv_handler.h -compiler-rt/lib/gwp_asan/optional/segv_handler_fuchsia.cpp -compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp -compiler-rt/lib/gwp_asan/platform_specific/common_fuchsia.cpp -compiler-rt/lib/gwp_asan/platform_specific/common_posix.cpp -compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_fuchsia.cpp -compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_fuchsia.h -compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_posix.cpp -compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_posix.h -compiler-rt/lib/gwp_asan/platform_specific/guarded_pool_allocator_tls.h -compiler-rt/lib/gwp_asan/platform_specific/mutex_fuchsia.cpp -compiler-rt/lib/gwp_asan/platform_specific/mutex_fuchsia.h -compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.cpp -compiler-rt/lib/gwp_asan/platform_specific/mutex_posix.h -compiler-rt/lib/gwp_asan/platform_specific/utilities_fuchsia.cpp -compiler-rt/lib/gwp_asan/platform_specific/utilities_posix.cpp -compiler-rt/lib/gwp_asan/tests/backtrace.cpp -compiler-rt/lib/gwp_asan/tests/basic.cpp -compiler-rt/lib/gwp_asan/tests/compression.cpp -compiler-rt/lib/gwp_asan/tests/crash_handler_api.cpp -compiler-rt/lib/gwp_asan/tests/driver.cpp -compiler-rt/lib/gwp_asan/tests/enable_disable.cpp -compiler-rt/lib/gwp_asan/tests/harness.cpp -compiler-rt/lib/gwp_asan/tests/harness.h -compiler-rt/lib/gwp_asan/tests/iterate.cpp -compiler-rt/lib/gwp_asan/tests/late_init.cpp -compiler-rt/lib/gwp_asan/tests/mutex_test.cpp -compiler-rt/lib/gwp_asan/tests/options.cpp -compiler-rt/lib/gwp_asan/tests/slot_reuse.cpp -compiler-rt/lib/gwp_asan/tests/thread_contention.cpp -compiler-rt/lib/gwp_asan/tests/platform_specific/printf_sanitizer_common.cpp -compiler-rt/lib/hwasan/hwasan_checks.h -compiler-rt/lib/hwasan/hwasan_dynamic_shadow.h -compiler-rt/lib/hwasan/hwasan_flags.h -compiler-rt/lib/hwasan/hwasan_globals.cpp -compiler-rt/lib/hwasan/hwasan_globals.h -compiler-rt/lib/hwasan/hwasan_linux.cpp -compiler-rt/lib/hwasan/hwasan_poisoning.cpp -compiler-rt/lib/hwasan/hwasan_poisoning.h -compiler-rt/lib/hwasan/hwasan_preinit.cpp -compiler-rt/lib/interception/interception_mac.cpp -compiler-rt/lib/interception/tests/interception_test_main.cpp -compiler-rt/lib/lsan/lsan.h -compiler-rt/lib/lsan/lsan_common.cpp -compiler-rt/lib/lsan/lsan_thread.cpp -compiler-rt/lib/lsan/lsan_thread.h -compiler-rt/lib/memprof/memprof_allocator.cpp -compiler-rt/lib/memprof/memprof_allocator.h -compiler-rt/lib/memprof/memprof_descriptions.cpp -compiler-rt/lib/memprof/memprof_descriptions.h -compiler-rt/lib/memprof/memprof_flags.cpp -compiler-rt/lib/memprof/memprof_flags.h -compiler-rt/lib/memprof/memprof_init_version.h -compiler-rt/lib/memprof/memprof_interceptors.cpp -compiler-rt/lib/memprof/memprof_interceptors.h -compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.cpp -compiler-rt/lib/memprof/memprof_interceptors_memintrinsics.h -compiler-rt/lib/memprof/memprof_interface_internal.h -compiler-rt/lib/memprof/memprof_internal.h -compiler-rt/lib/memprof/memprof_linux.cpp -compiler-rt/lib/memprof/memprof_malloc_linux.cpp -compiler-rt/lib/memprof/memprof_mibmap.cpp -compiler-rt/lib/memprof/memprof_mibmap.h -compiler-rt/lib/memprof/memprof_posix.cpp -compiler-rt/lib/memprof/memprof_preinit.cpp -compiler-rt/lib/memprof/memprof_rawprofile.cpp -compiler-rt/lib/memprof/memprof_rawprofile.h -compiler-rt/lib/memprof/memprof_rtl.cpp -compiler-rt/lib/memprof/memprof_shadow_setup.cpp -compiler-rt/lib/memprof/memprof_stack.cpp -compiler-rt/lib/memprof/memprof_stack.h -compiler-rt/lib/memprof/memprof_stats.cpp -compiler-rt/lib/memprof/memprof_stats.h -compiler-rt/lib/memprof/memprof_thread.cpp -compiler-rt/lib/memprof/memprof_thread.h -compiler-rt/lib/memprof/tests/driver.cpp -compiler-rt/lib/memprof/tests/rawprofile.cpp -compiler-rt/lib/msan/msan_chained_origin_depot.h -compiler-rt/lib/msan/msan_flags.h -compiler-rt/lib/msan/msan_poisoning.h -compiler-rt/lib/msan/msan_report.h -compiler-rt/lib/orc/adt.h -compiler-rt/lib/orc/debug.h -compiler-rt/lib/orc/elfnix_platform.cpp -compiler-rt/lib/orc/elfnix_platform.h -compiler-rt/lib/orc/endianness.h -compiler-rt/lib/orc/error.h -compiler-rt/lib/orc/executor_address.h -compiler-rt/lib/orc/extensible_rtti.cpp -compiler-rt/lib/orc/extensible_rtti.h -compiler-rt/lib/orc/log_error_to_stderr.cpp -compiler-rt/lib/orc/macho_ehframe_registration.cpp -compiler-rt/lib/orc/macho_platform.cpp -compiler-rt/lib/orc/macho_platform.h -compiler-rt/lib/orc/run_program_wrapper.cpp -compiler-rt/lib/orc/simple_packed_serialization.h -compiler-rt/lib/orc/wrapper_function_utils.h -compiler-rt/lib/orc/unittests/adt_test.cpp -compiler-rt/lib/orc/unittests/c_api_test.cpp -compiler-rt/lib/orc/unittests/endian_test.cpp -compiler-rt/lib/orc/unittests/error_test.cpp -compiler-rt/lib/orc/unittests/executor_address_test.cpp -compiler-rt/lib/orc/unittests/extensible_rtti_test.cpp -compiler-rt/lib/orc/unittests/orc_unit_test_main.cpp -compiler-rt/lib/orc/unittests/simple_packed_serialization_test.cpp -compiler-rt/lib/orc/unittests/wrapper_function_utils_test.cpp -compiler-rt/lib/safestack/safestack_util.h -compiler-rt/lib/sanitizer_common/sancov_flags.h -compiler-rt/lib/sanitizer_common/sanitizer_allocator_dlsym.h -compiler-rt/lib/sanitizer_common/sanitizer_allocator_report.h -compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.cpp -compiler-rt/lib/sanitizer_common/sanitizer_chained_origin_depot.h -compiler-rt/lib/sanitizer_common/sanitizer_dense_map.h -compiler-rt/lib/sanitizer_common/sanitizer_dense_map_info.h -compiler-rt/lib/sanitizer_common/sanitizer_errno.h -compiler-rt/lib/sanitizer_common/sanitizer_errno_codes.h -compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h -compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp -compiler-rt/lib/sanitizer_common/sanitizer_leb128.h -compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h -compiler-rt/lib/sanitizer_common/sanitizer_lzw.h -compiler-rt/lib/sanitizer_common/sanitizer_placement_new.h -compiler-rt/lib/sanitizer_common/sanitizer_platform.h -compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_openbsd.cpp -compiler-rt/lib/sanitizer_common/sanitizer_platform_limits_openbsd.h -compiler-rt/lib/sanitizer_common/sanitizer_stacktrace_printer.h -compiler-rt/lib/sanitizer_common/sanitizer_stack_store.cpp -compiler-rt/lib/sanitizer_common/sanitizer_stack_store.h -compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_fuchsia.h -compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_win.cpp -compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup_constants.h -compiler-rt/lib/sanitizer_common/sanitizer_thread_safety.h -compiler-rt/lib/sanitizer_common/sanitizer_tls_get_addr.h -compiler-rt/lib/sanitizer_common/sanitizer_type_traits.cpp -compiler-rt/lib/sanitizer_common/sanitizer_type_traits.h -compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp -compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_wrappers.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_addrhashmap_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_chained_origin_depot_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_dense_map_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_flat_map_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_hash_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_leb128_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_lzw_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_stackdepot_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_stack_store_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_stoptheworld_test.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_test_main.cpp -compiler-rt/lib/sanitizer_common/tests/sanitizer_type_traits_test.cpp -compiler-rt/lib/scudo/standalone/allocator_config.h -compiler-rt/lib/scudo/standalone/atomic_helpers.h -compiler-rt/lib/scudo/standalone/bytemap.h -compiler-rt/lib/scudo/standalone/checksum.cpp -compiler-rt/lib/scudo/standalone/checksum.h -compiler-rt/lib/scudo/standalone/chunk.h -compiler-rt/lib/scudo/standalone/combined.h -compiler-rt/lib/scudo/standalone/common.cpp -compiler-rt/lib/scudo/standalone/common.h -compiler-rt/lib/scudo/standalone/crc32_hw.cpp -compiler-rt/lib/scudo/standalone/flags.cpp -compiler-rt/lib/scudo/standalone/flags.h -compiler-rt/lib/scudo/standalone/flags_parser.cpp -compiler-rt/lib/scudo/standalone/flags_parser.h -compiler-rt/lib/scudo/standalone/fuchsia.cpp -compiler-rt/lib/scudo/standalone/fuchsia.h -compiler-rt/lib/scudo/standalone/internal_defs.h -compiler-rt/lib/scudo/standalone/linux.cpp -compiler-rt/lib/scudo/standalone/linux.h -compiler-rt/lib/scudo/standalone/list.h -compiler-rt/lib/scudo/standalone/local_cache.h -compiler-rt/lib/scudo/standalone/memtag.h -compiler-rt/lib/scudo/standalone/mutex.h -compiler-rt/lib/scudo/standalone/options.h -compiler-rt/lib/scudo/standalone/platform.h -compiler-rt/lib/scudo/standalone/primary32.h -compiler-rt/lib/scudo/standalone/primary64.h -compiler-rt/lib/scudo/standalone/quarantine.h -compiler-rt/lib/scudo/standalone/release.cpp -compiler-rt/lib/scudo/standalone/release.h -compiler-rt/lib/scudo/standalone/report.cpp -compiler-rt/lib/scudo/standalone/report.h -compiler-rt/lib/scudo/standalone/secondary.h -compiler-rt/lib/scudo/standalone/size_class_map.h -compiler-rt/lib/scudo/standalone/stack_depot.h -compiler-rt/lib/scudo/standalone/stats.h -compiler-rt/lib/scudo/standalone/string_utils.cpp -compiler-rt/lib/scudo/standalone/string_utils.h -compiler-rt/lib/scudo/standalone/trusty.cpp -compiler-rt/lib/scudo/standalone/trusty.h -compiler-rt/lib/scudo/standalone/tsd.h -compiler-rt/lib/scudo/standalone/tsd_exclusive.h -compiler-rt/lib/scudo/standalone/tsd_shared.h -compiler-rt/lib/scudo/standalone/vector.h -compiler-rt/lib/scudo/standalone/wrappers_c.cpp -compiler-rt/lib/scudo/standalone/wrappers_c.h -compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp -compiler-rt/lib/scudo/standalone/wrappers_c_checks.h -compiler-rt/lib/scudo/standalone/benchmarks/malloc_benchmark.cpp -compiler-rt/lib/scudo/standalone/fuzz/get_error_info_fuzzer.cpp -compiler-rt/lib/scudo/standalone/include/scudo/interface.h -compiler-rt/lib/scudo/standalone/tests/atomic_test.cpp -compiler-rt/lib/scudo/standalone/tests/bytemap_test.cpp -compiler-rt/lib/scudo/standalone/tests/checksum_test.cpp -compiler-rt/lib/scudo/standalone/tests/chunk_test.cpp -compiler-rt/lib/scudo/standalone/tests/combined_test.cpp -compiler-rt/lib/scudo/standalone/tests/common_test.cpp -compiler-rt/lib/scudo/standalone/tests/flags_test.cpp -compiler-rt/lib/scudo/standalone/tests/list_test.cpp -compiler-rt/lib/scudo/standalone/tests/map_test.cpp -compiler-rt/lib/scudo/standalone/tests/memtag_test.cpp -compiler-rt/lib/scudo/standalone/tests/mutex_test.cpp -compiler-rt/lib/scudo/standalone/tests/primary_test.cpp -compiler-rt/lib/scudo/standalone/tests/quarantine_test.cpp -compiler-rt/lib/scudo/standalone/tests/release_test.cpp -compiler-rt/lib/scudo/standalone/tests/report_test.cpp -compiler-rt/lib/scudo/standalone/tests/scudo_unit_test.h -compiler-rt/lib/scudo/standalone/tests/scudo_unit_test_main.cpp -compiler-rt/lib/scudo/standalone/tests/secondary_test.cpp -compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp -compiler-rt/lib/scudo/standalone/tests/stats_test.cpp -compiler-rt/lib/scudo/standalone/tests/strings_test.cpp -compiler-rt/lib/scudo/standalone/tests/vector_test.cpp -compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp -compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp -compiler-rt/lib/scudo/standalone/tools/compute_size_class_config.cpp -compiler-rt/lib/tsan/rtl/tsan_fd.h -compiler-rt/lib/tsan/rtl/tsan_ignoreset.h -compiler-rt/lib/tsan/rtl/tsan_ilist.h -compiler-rt/lib/tsan/rtl/tsan_interface_ann.h -compiler-rt/lib/tsan/rtl/tsan_mman.h -compiler-rt/lib/tsan/rtl/tsan_mutexset.h -compiler-rt/lib/tsan/rtl/tsan_ppc_regs.h -compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp -compiler-rt/lib/tsan/rtl/tsan_shadow.h -compiler-rt/lib/tsan/rtl/tsan_stack_trace.h -compiler-rt/lib/tsan/rtl/tsan_suppressions.h -compiler-rt/lib/tsan/rtl/tsan_symbolize.h -compiler-rt/lib/tsan/rtl/tsan_trace.h -compiler-rt/lib/tsan/rtl/tsan_vector_clock.h -compiler-rt/lib/tsan/rtl-old/tsan_fd.h -compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.h -compiler-rt/lib/tsan/rtl-old/tsan_ilist.h -compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.h -compiler-rt/lib/tsan/rtl-old/tsan_mman.h -compiler-rt/lib/tsan/rtl-old/tsan_mutexset.h -compiler-rt/lib/tsan/rtl-old/tsan_ppc_regs.h -compiler-rt/lib/tsan/rtl-old/tsan_rtl_access.cpp -compiler-rt/lib/tsan/rtl-old/tsan_shadow.h -compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.h -compiler-rt/lib/tsan/rtl-old/tsan_suppressions.h -compiler-rt/lib/tsan/rtl-old/tsan_symbolize.h -compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.h -compiler-rt/lib/tsan/tests/unit/tsan_ilist_test.cpp -compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp -compiler-rt/lib/tsan/tests/unit/tsan_vector_clock_test.cpp -compiler-rt/lib/ubsan/ubsan_init_standalone_preinit.cpp -compiler-rt/lib/ubsan/ubsan_monitor.cpp -compiler-rt/lib/ubsan/ubsan_monitor.h -compiler-rt/lib/ubsan/ubsan_signals_standalone.h -compiler-rt/lib/ubsan/ubsan_win_dll_thunk.cpp -compiler-rt/lib/ubsan/ubsan_win_dynamic_runtime_thunk.cpp -compiler-rt/lib/ubsan/ubsan_win_weak_interception.cpp -compiler-rt/lib/xray/xray_AArch64.cpp -compiler-rt/lib/xray/xray_arm.cpp -compiler-rt/lib/xray/xray_basic_flags.cpp -compiler-rt/lib/xray/xray_basic_flags.h -compiler-rt/lib/xray/xray_basic_logging.h -compiler-rt/lib/xray/xray_buffer_queue.cpp -compiler-rt/lib/xray/xray_buffer_queue.h -compiler-rt/lib/xray/xray_fdr_controller.h -compiler-rt/lib/xray/xray_fdr_flags.cpp -compiler-rt/lib/xray/xray_fdr_flags.h -compiler-rt/lib/xray/xray_fdr_logging.h -compiler-rt/lib/xray/xray_fdr_log_records.h -compiler-rt/lib/xray/xray_flags.cpp -compiler-rt/lib/xray/xray_flags.h -compiler-rt/lib/xray/xray_interface_internal.h -compiler-rt/lib/xray/xray_log_interface.cpp -compiler-rt/lib/xray/xray_mips.cpp -compiler-rt/lib/xray/xray_mips64.cpp -compiler-rt/lib/xray/xray_powerpc64.cpp -compiler-rt/lib/xray/xray_profile_collector.cpp -compiler-rt/lib/xray/xray_profile_collector.h -compiler-rt/lib/xray/xray_profiling.cpp -compiler-rt/lib/xray/xray_profiling_flags.cpp -compiler-rt/lib/xray/xray_profiling_flags.h -compiler-rt/lib/xray/xray_recursion_guard.h -compiler-rt/lib/xray/xray_trampoline_powerpc64.cpp -compiler-rt/lib/xray/xray_tsc.h -compiler-rt/lib/xray/tests/unit/allocator_test.cpp -compiler-rt/lib/xray/tests/unit/buffer_queue_test.cpp -compiler-rt/lib/xray/tests/unit/fdr_log_writer_test.cpp -compiler-rt/lib/xray/tests/unit/function_call_trie_test.cpp -compiler-rt/lib/xray/tests/unit/profile_collector_test.cpp -compiler-rt/lib/xray/tests/unit/segmented_array_test.cpp -compiler-rt/lib/xray/tests/unit/test_helpers.h -compiler-rt/lib/xray/tests/unit/xray_unit_test_main.cpp -compiler-rt/tools/gwp_asan/options_parser_fuzzer.cpp -compiler-rt/tools/gwp_asan/stack_trace_compressor_fuzzer.cpp -cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names_noncanonical_type_units.cpp -cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cpp -cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/test.cpp -cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source/test.cpp -cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp -cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp -cross-project-tests/debuginfo-tests/dexter-tests/global-constant.cpp -cross-project-tests/debuginfo-tests/dexter-tests/nrvo.cpp -cross-project-tests/debuginfo-tests/dexter-tests/realigned-frame.cpp -cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp -flang/examples/external-hello.cpp -flang/examples/FlangOmpReport/FlangOmpReport.cpp -flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp -flang/examples/FlangOmpReport/FlangOmpReportVisitor.h -flang/examples/PrintFlangFunctionNames/PrintFlangFunctionNames.cpp -flang/include/flang/ISO_Fortran_binding.h -flang/include/flang/Common/bit-population-count.h -flang/include/flang/Common/constexpr-bitset.h -flang/include/flang/Common/default-kinds.h -flang/include/flang/Common/enum-set.h -flang/include/flang/Common/fast-int-set.h -flang/include/flang/Common/format.h -flang/include/flang/Common/Fortran-features.h -flang/include/flang/Common/Fortran.h -flang/include/flang/Common/idioms.h -flang/include/flang/Common/indirection.h -flang/include/flang/Common/interval.h -flang/include/flang/Common/leading-zero-bit-count.h -flang/include/flang/Common/long-double.h -flang/include/flang/Common/real.h -flang/include/flang/Common/reference-counted.h -flang/include/flang/Common/reference.h -flang/include/flang/Common/restorer.h -flang/include/flang/Common/static-multimap-view.h -flang/include/flang/Common/template.h -flang/include/flang/Common/uint128.h -flang/include/flang/Common/unwrap.h -flang/include/flang/Decimal/binary-floating-point.h -flang/include/flang/Decimal/decimal.h -flang/include/flang/Evaluate/call.h -flang/include/flang/Evaluate/characteristics.h -flang/include/flang/Evaluate/check-expression.h -flang/include/flang/Evaluate/common.h -flang/include/flang/Evaluate/complex.h -flang/include/flang/Evaluate/constant.h -flang/include/flang/Evaluate/expression.h -flang/include/flang/Evaluate/fold-designator.h -flang/include/flang/Evaluate/fold.h -flang/include/flang/Evaluate/formatting.h -flang/include/flang/Evaluate/initial-image.h -flang/include/flang/Evaluate/integer.h -flang/include/flang/Evaluate/intrinsics-library.h -flang/include/flang/Evaluate/intrinsics.h -flang/include/flang/Evaluate/logical.h -flang/include/flang/Evaluate/real.h -flang/include/flang/Evaluate/rounding-bits.h -flang/include/flang/Evaluate/shape.h -flang/include/flang/Evaluate/static-data.h -flang/include/flang/Evaluate/tools.h -flang/include/flang/Evaluate/traverse.h -flang/include/flang/Evaluate/type.h -flang/include/flang/Evaluate/variable.h -flang/include/flang/Frontend/CompilerInstance.h -flang/include/flang/Frontend/FrontendAction.h -flang/include/flang/Frontend/FrontendActions.h -flang/include/flang/Frontend/FrontendOptions.h -flang/include/flang/Frontend/FrontendPluginRegistry.h -flang/include/flang/Frontend/PreprocessorOptions.h -flang/include/flang/Frontend/TargetOptions.h -flang/include/flang/Frontend/TextDiagnostic.h -flang/include/flang/Frontend/TextDiagnosticBuffer.h -flang/include/flang/Frontend/TextDiagnosticPrinter.h -flang/include/flang/FrontendTool/Utils.h -flang/include/flang/Lower/AbstractConverter.h -flang/include/flang/Lower/Allocatable.h -flang/include/flang/Lower/BoxAnalyzer.h -flang/include/flang/Lower/Bridge.h -flang/include/flang/Lower/CallInterface.h -flang/include/flang/Lower/Coarray.h -flang/include/flang/Lower/ComponentPath.h -flang/include/flang/Lower/ConvertExpr.h -flang/include/flang/Lower/ConvertType.h -flang/include/flang/Lower/ConvertVariable.h -flang/include/flang/Lower/DumpEvaluateExpr.h -flang/include/flang/Lower/HostAssociations.h -flang/include/flang/Lower/IntervalSet.h -flang/include/flang/Lower/IntrinsicCall.h -flang/include/flang/Lower/IO.h -flang/include/flang/Lower/IterationSpace.h -flang/include/flang/Lower/Mangler.h -flang/include/flang/Lower/OpenACC.h -flang/include/flang/Lower/OpenMP.h -flang/include/flang/Lower/PFTBuilder.h -flang/include/flang/Lower/PFTDefs.h -flang/include/flang/Lower/Runtime.h -flang/include/flang/Lower/StatementContext.h -flang/include/flang/Lower/Todo.h -flang/include/flang/Lower/Support/Utils.h -flang/include/flang/Lower/Support/Verifier.h -flang/include/flang/Optimizer/Builder/BoxValue.h -flang/include/flang/Optimizer/Builder/Character.h -flang/include/flang/Optimizer/Builder/Complex.h -flang/include/flang/Optimizer/Builder/DoLoopHelper.h -flang/include/flang/Optimizer/Builder/Factory.h -flang/include/flang/Optimizer/Builder/FIRBuilder.h -flang/include/flang/Optimizer/Builder/MutableBox.h -flang/include/flang/Optimizer/Builder/Runtime/Assign.h -flang/include/flang/Optimizer/Builder/Runtime/Character.h -flang/include/flang/Optimizer/Builder/Runtime/Command.h -flang/include/flang/Optimizer/Builder/Runtime/Derived.h -flang/include/flang/Optimizer/Builder/Runtime/Numeric.h -flang/include/flang/Optimizer/Builder/Runtime/Ragged.h -flang/include/flang/Optimizer/Builder/Runtime/Reduction.h -flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h -flang/include/flang/Optimizer/Builder/Runtime/Stop.h -flang/include/flang/Optimizer/Builder/Runtime/Transformational.h -flang/include/flang/Optimizer/CodeGen/CodeGen.h -flang/include/flang/Optimizer/Dialect/FIRAttr.h -flang/include/flang/Optimizer/Dialect/FIRDialect.h -flang/include/flang/Optimizer/Dialect/FIROps.h -flang/include/flang/Optimizer/Dialect/FIROpsSupport.h -flang/include/flang/Optimizer/Dialect/FIRType.h -flang/include/flang/Optimizer/Support/FatalError.h -flang/include/flang/Optimizer/Support/FIRContext.h -flang/include/flang/Optimizer/Support/InitFIR.h -flang/include/flang/Optimizer/Support/InternalNames.h -flang/include/flang/Optimizer/Support/KindMapping.h -flang/include/flang/Optimizer/Support/Matcher.h -flang/include/flang/Optimizer/Support/TypeCode.h -flang/include/flang/Optimizer/Support/Utils.h -flang/include/flang/Optimizer/Transforms/Passes.h -flang/include/flang/Parser/char-block.h -flang/include/flang/Parser/char-buffer.h -flang/include/flang/Parser/char-set.h -flang/include/flang/Parser/characters.h -flang/include/flang/Parser/dump-parse-tree.h -flang/include/flang/Parser/format-specification.h -flang/include/flang/Parser/instrumented-parser.h -flang/include/flang/Parser/message.h -flang/include/flang/Parser/parse-state.h -flang/include/flang/Parser/parse-tree-visitor.h -flang/include/flang/Parser/parsing.h -flang/include/flang/Parser/preprocessor.h -flang/include/flang/Parser/provenance.h -flang/include/flang/Parser/source.h -flang/include/flang/Parser/token-sequence.h -flang/include/flang/Parser/tools.h -flang/include/flang/Parser/unparse.h -flang/include/flang/Parser/user-state.h -flang/include/flang/Runtime/allocatable.h -flang/include/flang/Runtime/assign.h -flang/include/flang/Runtime/c-or-cpp.h -flang/include/flang/Runtime/character.h -flang/include/flang/Runtime/command.h -flang/include/flang/Runtime/cpp-type.h -flang/include/flang/Runtime/derived-api.h -flang/include/flang/Runtime/descriptor.h -flang/include/flang/Runtime/entry-names.h -flang/include/flang/Runtime/extensions.h -flang/include/flang/Runtime/inquiry.h -flang/include/flang/Runtime/io-api.h -flang/include/flang/Runtime/iostat.h -flang/include/flang/Runtime/main.h -flang/include/flang/Runtime/matmul.h -flang/include/flang/Runtime/memory.h -flang/include/flang/Runtime/misc-intrinsic.h -flang/include/flang/Runtime/numeric.h -flang/include/flang/Runtime/pointer.h -flang/include/flang/Runtime/ragged.h -flang/include/flang/Runtime/random.h -flang/include/flang/Runtime/reduction.h -flang/include/flang/Runtime/stop.h -flang/include/flang/Runtime/support.h -flang/include/flang/Runtime/time-intrinsic.h -flang/include/flang/Runtime/transformational.h -flang/include/flang/Runtime/type-code.h -flang/include/flang/Semantics/attr.h -flang/include/flang/Semantics/expression.h -flang/include/flang/Semantics/openmp-directive-sets.h -flang/include/flang/Semantics/runtime-type-info.h -flang/include/flang/Semantics/scope.h -flang/include/flang/Semantics/semantics.h -flang/include/flang/Semantics/symbol.h -flang/include/flang/Semantics/tools.h -flang/include/flang/Semantics/type.h -flang/include/flang/Semantics/unparse-with-symbols.h -flang/lib/Common/default-kinds.cpp -flang/lib/Common/Fortran-features.cpp -flang/lib/Common/Fortran.cpp -flang/lib/Common/idioms.cpp -flang/lib/Decimal/big-radix-floating-point.h -flang/lib/Decimal/binary-to-decimal.cpp -flang/lib/Decimal/decimal-to-binary.cpp -flang/lib/Evaluate/call.cpp -flang/lib/Evaluate/character.h -flang/lib/Evaluate/check-expression.cpp -flang/lib/Evaluate/common.cpp -flang/lib/Evaluate/complex.cpp -flang/lib/Evaluate/constant.cpp -flang/lib/Evaluate/expression.cpp -flang/lib/Evaluate/fold-character.cpp -flang/lib/Evaluate/fold-complex.cpp -flang/lib/Evaluate/fold-designator.cpp -flang/lib/Evaluate/fold-implementation.h -flang/lib/Evaluate/fold-logical.cpp -flang/lib/Evaluate/fold-real.cpp -flang/lib/Evaluate/fold-reduction.cpp -flang/lib/Evaluate/fold-reduction.h -flang/lib/Evaluate/fold.cpp -flang/lib/Evaluate/formatting.cpp -flang/lib/Evaluate/host.cpp -flang/lib/Evaluate/host.h -flang/lib/Evaluate/initial-image.cpp -flang/lib/Evaluate/int-power.h -flang/lib/Evaluate/integer.cpp -flang/lib/Evaluate/intrinsics-library.cpp -flang/lib/Evaluate/intrinsics.cpp -flang/lib/Evaluate/logical.cpp -flang/lib/Evaluate/real.cpp -flang/lib/Evaluate/shape.cpp -flang/lib/Evaluate/static-data.cpp -flang/lib/Evaluate/tools.cpp -flang/lib/Evaluate/type.cpp -flang/lib/Evaluate/variable.cpp -flang/lib/Frontend/CompilerInstance.cpp -flang/lib/Frontend/FrontendAction.cpp -flang/lib/Frontend/FrontendOptions.cpp -flang/lib/Frontend/TextDiagnostic.cpp -flang/lib/Frontend/TextDiagnosticBuffer.cpp -flang/lib/Frontend/TextDiagnosticPrinter.cpp -flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp -flang/lib/Lower/Allocatable.cpp -flang/lib/Lower/Bridge.cpp -flang/lib/Lower/CallInterface.cpp -flang/lib/Lower/Coarray.cpp -flang/lib/Lower/ComponentPath.cpp -flang/lib/Lower/ConvertExpr.cpp -flang/lib/Lower/ConvertType.cpp -flang/lib/Lower/ConvertVariable.cpp -flang/lib/Lower/DumpEvaluateExpr.cpp -flang/lib/Lower/IntervalSet.h -flang/lib/Lower/IntrinsicCall.cpp -flang/lib/Lower/IO.cpp -flang/lib/Lower/IterationSpace.cpp -flang/lib/Lower/Mangler.cpp -flang/lib/Lower/OpenACC.cpp -flang/lib/Lower/OpenMP.cpp -flang/lib/Lower/PFTBuilder.cpp -flang/lib/Lower/RTBuilder.h -flang/lib/Lower/Runtime.cpp -flang/lib/Lower/SymbolMap.cpp -flang/lib/Optimizer/Builder/BoxValue.cpp -flang/lib/Optimizer/Builder/Character.cpp -flang/lib/Optimizer/Builder/Complex.cpp -flang/lib/Optimizer/Builder/DoLoopHelper.cpp -flang/lib/Optimizer/Builder/FIRBuilder.cpp -flang/lib/Optimizer/Builder/MutableBox.cpp -flang/lib/Optimizer/Builder/Runtime/Assign.cpp -flang/lib/Optimizer/Builder/Runtime/Character.cpp -flang/lib/Optimizer/Builder/Runtime/Command.cpp -flang/lib/Optimizer/Builder/Runtime/Derived.cpp -flang/lib/Optimizer/Builder/Runtime/Numeric.cpp -flang/lib/Optimizer/Builder/Runtime/Ragged.cpp -flang/lib/Optimizer/Builder/Runtime/Reduction.cpp -flang/lib/Optimizer/Builder/Runtime/Stop.cpp -flang/lib/Optimizer/Builder/Runtime/Transformational.cpp -flang/lib/Optimizer/CodeGen/CGOps.cpp -flang/lib/Optimizer/CodeGen/CGOps.h -flang/lib/Optimizer/CodeGen/CodeGen.cpp -flang/lib/Optimizer/CodeGen/DescriptorModel.h -flang/lib/Optimizer/CodeGen/PassDetail.h -flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp -flang/lib/Optimizer/CodeGen/Target.cpp -flang/lib/Optimizer/CodeGen/Target.h -flang/lib/Optimizer/CodeGen/TargetRewrite.cpp -flang/lib/Optimizer/CodeGen/TypeConverter.h -flang/lib/Optimizer/Dialect/FIRAttr.cpp -flang/lib/Optimizer/Dialect/FIRDialect.cpp -flang/lib/Optimizer/Dialect/FIROps.cpp -flang/lib/Optimizer/Dialect/FIRType.cpp -flang/lib/Optimizer/Dialect/Inliner.cpp -flang/lib/Optimizer/Support/FIRContext.cpp -flang/lib/Optimizer/Support/InitFIR.cpp -flang/lib/Optimizer/Support/InternalNames.cpp -flang/lib/Optimizer/Support/KindMapping.cpp -flang/lib/Optimizer/Transforms/AbstractResult.cpp -flang/lib/Optimizer/Transforms/AffineDemotion.cpp -flang/lib/Optimizer/Transforms/AffinePromotion.cpp -flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp -flang/lib/Optimizer/Transforms/CharacterConversion.cpp -flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp -flang/lib/Optimizer/Transforms/MemoryAllocation.cpp -flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp -flang/lib/Optimizer/Transforms/PassDetail.h -flang/lib/Optimizer/Transforms/RewriteLoop.cpp -flang/lib/Optimizer/Transforms/StackArrays.cpp -flang/lib/Parser/basic-parsers.h -flang/lib/Parser/char-block.cpp -flang/lib/Parser/char-buffer.cpp -flang/lib/Parser/char-set.cpp -flang/lib/Parser/characters.cpp -flang/lib/Parser/debug-parser.cpp -flang/lib/Parser/debug-parser.h -flang/lib/Parser/executable-parsers.cpp -flang/lib/Parser/expr-parsers.cpp -flang/lib/Parser/expr-parsers.h -flang/lib/Parser/Fortran-parsers.cpp -flang/lib/Parser/instrumented-parser.cpp -flang/lib/Parser/io-parsers.cpp -flang/lib/Parser/message.cpp -flang/lib/Parser/misc-parsers.h -flang/lib/Parser/openacc-parsers.cpp -flang/lib/Parser/openmp-parsers.cpp -flang/lib/Parser/parse-tree.cpp -flang/lib/Parser/parsing.cpp -flang/lib/Parser/preprocessor.cpp -flang/lib/Parser/prescan.cpp -flang/lib/Parser/prescan.h -flang/lib/Parser/program-parsers.cpp -flang/lib/Parser/provenance.cpp -flang/lib/Parser/source.cpp -flang/lib/Parser/stmt-parser.h -flang/lib/Parser/token-parsers.h -flang/lib/Parser/token-sequence.cpp -flang/lib/Parser/tools.cpp -flang/lib/Parser/type-parser-implementation.h -flang/lib/Parser/type-parsers.h -flang/lib/Parser/unparse.cpp -flang/lib/Parser/user-state.cpp -flang/lib/Semantics/assignment.cpp -flang/lib/Semantics/assignment.h -flang/lib/Semantics/attr.cpp -flang/lib/Semantics/canonicalize-acc.cpp -flang/lib/Semantics/canonicalize-acc.h -flang/lib/Semantics/canonicalize-do.cpp -flang/lib/Semantics/canonicalize-do.h -flang/lib/Semantics/canonicalize-omp.cpp -flang/lib/Semantics/canonicalize-omp.h -flang/lib/Semantics/check-acc-structure.cpp -flang/lib/Semantics/check-allocate.cpp -flang/lib/Semantics/check-allocate.h -flang/lib/Semantics/check-arithmeticif.cpp -flang/lib/Semantics/check-arithmeticif.h -flang/lib/Semantics/check-call.h -flang/lib/Semantics/check-case.cpp -flang/lib/Semantics/check-case.h -flang/lib/Semantics/check-coarray.cpp -flang/lib/Semantics/check-coarray.h -flang/lib/Semantics/check-data.cpp -flang/lib/Semantics/check-data.h -flang/lib/Semantics/check-deallocate.cpp -flang/lib/Semantics/check-deallocate.h -flang/lib/Semantics/check-declarations.h -flang/lib/Semantics/check-directive-structure.h -flang/lib/Semantics/check-do-forall.cpp -flang/lib/Semantics/check-do-forall.h -flang/lib/Semantics/check-if-stmt.cpp -flang/lib/Semantics/check-if-stmt.h -flang/lib/Semantics/check-io.cpp -flang/lib/Semantics/check-io.h -flang/lib/Semantics/check-namelist.cpp -flang/lib/Semantics/check-namelist.h -flang/lib/Semantics/check-nullify.cpp -flang/lib/Semantics/check-nullify.h -flang/lib/Semantics/check-omp-structure.cpp -flang/lib/Semantics/check-omp-structure.h -flang/lib/Semantics/check-purity.cpp -flang/lib/Semantics/check-purity.h -flang/lib/Semantics/check-return.cpp -flang/lib/Semantics/check-return.h -flang/lib/Semantics/check-select-rank.cpp -flang/lib/Semantics/check-select-rank.h -flang/lib/Semantics/check-select-type.cpp -flang/lib/Semantics/check-select-type.h -flang/lib/Semantics/check-stop.cpp -flang/lib/Semantics/check-stop.h -flang/lib/Semantics/compute-offsets.cpp -flang/lib/Semantics/compute-offsets.h -flang/lib/Semantics/data-to-inits.cpp -flang/lib/Semantics/mod-file.h -flang/lib/Semantics/pointer-assignment.cpp -flang/lib/Semantics/pointer-assignment.h -flang/lib/Semantics/program-tree.cpp -flang/lib/Semantics/program-tree.h -flang/lib/Semantics/resolve-directives.cpp -flang/lib/Semantics/resolve-directives.h -flang/lib/Semantics/resolve-labels.cpp -flang/lib/Semantics/resolve-labels.h -flang/lib/Semantics/resolve-names-utils.cpp -flang/lib/Semantics/resolve-names-utils.h -flang/lib/Semantics/resolve-names.h -flang/lib/Semantics/rewrite-parse-tree.cpp -flang/lib/Semantics/rewrite-parse-tree.h -flang/lib/Semantics/runtime-type-info.cpp -flang/lib/Semantics/scope.cpp -flang/lib/Semantics/semantics.cpp -flang/lib/Semantics/tools.cpp -flang/lib/Semantics/unparse-with-symbols.cpp -flang/module/omp_lib.h -flang/runtime/allocatable.cpp -flang/runtime/assign.cpp -flang/runtime/buffer.cpp -flang/runtime/buffer.h -flang/runtime/character.cpp -flang/runtime/command.cpp -flang/runtime/complex-reduction.h -flang/runtime/connection.cpp -flang/runtime/connection.h -flang/runtime/copy.cpp -flang/runtime/copy.h -flang/runtime/derived-api.cpp -flang/runtime/derived.h -flang/runtime/descriptor-io.cpp -flang/runtime/descriptor-io.h -flang/runtime/descriptor.cpp -flang/runtime/dot-product.cpp -flang/runtime/edit-input.cpp -flang/runtime/edit-input.h -flang/runtime/edit-output.cpp -flang/runtime/edit-output.h -flang/runtime/environment.cpp -flang/runtime/environment.h -flang/runtime/extensions.cpp -flang/runtime/extrema.cpp -flang/runtime/file.cpp -flang/runtime/file.h -flang/runtime/findloc.cpp -flang/runtime/format-implementation.h -flang/runtime/format.cpp -flang/runtime/format.h -flang/runtime/inquiry.cpp -flang/runtime/internal-unit.cpp -flang/runtime/internal-unit.h -flang/runtime/io-api.cpp -flang/runtime/io-error.cpp -flang/runtime/io-error.h -flang/runtime/io-stmt.cpp -flang/runtime/io-stmt.h -flang/runtime/iostat.cpp -flang/runtime/ISO_Fortran_binding.cpp -flang/runtime/lock.h -flang/runtime/main.cpp -flang/runtime/matmul.cpp -flang/runtime/memory.cpp -flang/runtime/misc-intrinsic.cpp -flang/runtime/namelist.cpp -flang/runtime/namelist.h -flang/runtime/numeric.cpp -flang/runtime/pointer.cpp -flang/runtime/product.cpp -flang/runtime/ragged.cpp -flang/runtime/random.cpp -flang/runtime/reduction-templates.h -flang/runtime/reduction.cpp -flang/runtime/stat.cpp -flang/runtime/stat.h -flang/runtime/stop.cpp -flang/runtime/sum.cpp -flang/runtime/support.cpp -flang/runtime/terminator.cpp -flang/runtime/terminator.h -flang/runtime/time-intrinsic.cpp -flang/runtime/tools.cpp -flang/runtime/tools.h -flang/runtime/transformational.cpp -flang/runtime/type-code.cpp -flang/runtime/type-info.cpp -flang/runtime/type-info.h -flang/runtime/unit-map.cpp -flang/runtime/unit-map.h -flang/runtime/unit.h -flang/tools/bbc/bbc.cpp -flang/tools/f18/dump.cpp -flang/tools/f18-parse-demo/f18-parse-demo.cpp -flang/tools/f18-parse-demo/stub-evaluate.cpp -flang/tools/fir-opt/fir-opt.cpp -flang/tools/flang-driver/driver.cpp -flang/tools/flang-driver/fc1_main.cpp -flang/tools/tco/tco.cpp -flang/unittests/Common/FastIntSetTest.cpp -flang/unittests/Decimal/quick-sanity-test.cpp -flang/unittests/Decimal/thorough-test.cpp -flang/unittests/Evaluate/bit-population-count.cpp -flang/unittests/Evaluate/expression.cpp -flang/unittests/Evaluate/folding.cpp -flang/unittests/Evaluate/fp-testing.cpp -flang/unittests/Evaluate/fp-testing.h -flang/unittests/Evaluate/integer.cpp -flang/unittests/Evaluate/intrinsics.cpp -flang/unittests/Evaluate/ISO-Fortran-binding.cpp -flang/unittests/Evaluate/leading-zero-bit-count.cpp -flang/unittests/Evaluate/logical.cpp -flang/unittests/Evaluate/real.cpp -flang/unittests/Evaluate/reshape.cpp -flang/unittests/Evaluate/testing.cpp -flang/unittests/Evaluate/testing.h -flang/unittests/Evaluate/uint128.cpp -flang/unittests/Frontend/CompilerInstanceTest.cpp -flang/unittests/Frontend/FrontendActionTest.cpp -flang/unittests/Optimizer/InternalNamesTest.cpp -flang/unittests/Optimizer/KindMappingTest.cpp -flang/unittests/Optimizer/RTBuilder.cpp -flang/unittests/Optimizer/Builder/CharacterTest.cpp -flang/unittests/Optimizer/Builder/ComplexTest.cpp -flang/unittests/Optimizer/Builder/DoLoopHelperTest.cpp -flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp -flang/unittests/Optimizer/Builder/Runtime/AssignTest.cpp -flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp -flang/unittests/Optimizer/Builder/Runtime/CommandTest.cpp -flang/unittests/Optimizer/Builder/Runtime/DerivedTest.cpp -flang/unittests/Optimizer/Builder/Runtime/NumericTest.cpp -flang/unittests/Optimizer/Builder/Runtime/RaggedTest.cpp -flang/unittests/Optimizer/Builder/Runtime/ReductionTest.cpp -flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h -flang/unittests/Optimizer/Builder/Runtime/StopTest.cpp -flang/unittests/Optimizer/Builder/Runtime/TransformationalTest.cpp -flang/unittests/Runtime/BufferTest.cpp -flang/unittests/Runtime/CharacterTest.cpp -flang/unittests/Runtime/CommandTest.cpp -flang/unittests/Runtime/CrashHandlerFixture.cpp -flang/unittests/Runtime/CrashHandlerFixture.h -flang/unittests/Runtime/ExternalIOTest.cpp -flang/unittests/Runtime/Format.cpp -flang/unittests/Runtime/Inquiry.cpp -flang/unittests/Runtime/ListInputTest.cpp -flang/unittests/Runtime/Matmul.cpp -flang/unittests/Runtime/MiscIntrinsic.cpp -flang/unittests/Runtime/Namelist.cpp -flang/unittests/Runtime/Numeric.cpp -flang/unittests/Runtime/NumericalFormatTest.cpp -flang/unittests/Runtime/Ragged.cpp -flang/unittests/Runtime/Random.cpp -flang/unittests/Runtime/Reduction.cpp -flang/unittests/Runtime/RuntimeCrashTest.cpp -flang/unittests/Runtime/Stop.cpp -flang/unittests/Runtime/Time.cpp -flang/unittests/Runtime/tools.h -flang/unittests/Runtime/Transformational.cpp -libc/AOR_v20.02/math/v_exp.h -libc/benchmarks/JSON.cpp -libc/benchmarks/JSON.h -libc/benchmarks/LibcBenchmark.cpp -libc/benchmarks/LibcBenchmark.h -libc/benchmarks/LibcBenchmarkTest.cpp -libc/benchmarks/LibcDefaultImplementations.cpp -libc/benchmarks/LibcFunctionPrototypes.h -libc/benchmarks/LibcMemoryBenchmark.cpp -libc/benchmarks/LibcMemoryBenchmark.h -libc/benchmarks/LibcMemoryBenchmarkMain.cpp -libc/benchmarks/LibcMemoryBenchmarkTest.cpp -libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp -libc/benchmarks/MemorySizeDistributions.cpp -libc/benchmarks/MemorySizeDistributions.h -libc/benchmarks/automemcpy/include/automemcpy/CodeGen.h -libc/benchmarks/automemcpy/include/automemcpy/FunctionDescriptor.h -libc/benchmarks/automemcpy/include/automemcpy/RandomFunctionGenerator.h -libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h -libc/benchmarks/automemcpy/lib/CodeGen.cpp -libc/benchmarks/automemcpy/lib/CodeGenMain.cpp -libc/benchmarks/automemcpy/lib/RandomFunctionGenerator.cpp -libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp -libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp -libc/benchmarks/automemcpy/unittests/CodeGenTest.cpp -libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp -libc/config/linux/app.h -libc/fuzzing/math/Compare.h -libc/fuzzing/math/math_differential_fuzz.cpp -libc/fuzzing/math/nextafter_differential_fuzz.cpp -libc/fuzzing/math/RemQuoDiff.h -libc/fuzzing/math/SingleInputSingleOutputDiff.h -libc/fuzzing/math/TwoInputSingleOutputDiff.h -libc/fuzzing/stdlib/atof_differential_fuzz.cpp -libc/fuzzing/stdlib/qsort_fuzz.cpp -libc/fuzzing/stdlib/StringParserOutputDiff.h -libc/fuzzing/string/strcmp_fuzz.cpp -libc/fuzzing/string/strstr_fuzz.cpp -libc/include/__llvm-libc-common.h -libc/include/llvm-libc-macros/fcntl-macros.h -libc/include/llvm-libc-macros/stdio-macros.h -libc/include/llvm-libc-macros/linux/fcntl-macros.h -libc/include/llvm-libc-types/cnd_t.h -libc/include/llvm-libc-types/div_t.h -libc/include/llvm-libc-types/double_t.h -libc/include/llvm-libc-types/fenv_t.h -libc/include/llvm-libc-types/fexcept_t.h -libc/include/llvm-libc-types/FILE.h -libc/include/llvm-libc-types/float_t.h -libc/include/llvm-libc-types/imaxdiv_t.h -libc/include/llvm-libc-types/jmp_buf.h -libc/include/llvm-libc-types/ldiv_t.h -libc/include/llvm-libc-types/lldiv_t.h -libc/include/llvm-libc-types/mode_t.h -libc/include/llvm-libc-types/mtx_t.h -libc/include/llvm-libc-types/off_t.h -libc/include/llvm-libc-types/once_flag.h -libc/include/llvm-libc-types/size_t.h -libc/include/llvm-libc-types/ssize_t.h -libc/include/llvm-libc-types/struct_sigaction.h -libc/include/llvm-libc-types/struct_tm.h -libc/include/llvm-libc-types/thrd_start_t.h -libc/include/llvm-libc-types/thrd_t.h -libc/include/llvm-libc-types/time_t.h -libc/include/llvm-libc-types/__atexithandler_t.h -libc/include/llvm-libc-types/__bsearchcompare_t.h -libc/include/llvm-libc-types/__call_once_func_t.h -libc/include/llvm-libc-types/__futex_word.h -libc/include/llvm-libc-types/__mutex_type.h -libc/include/llvm-libc-types/__qsortcompare_t.h -libc/include/llvm-libc-types/__sighandler_t.h -libc/loader/linux/aarch64/start.cpp -libc/loader/linux/x86_64/start.cpp -libc/src/assert/__assert_fail.h -libc/src/ctype/isalnum.cpp -libc/src/ctype/isalnum.h -libc/src/ctype/isalpha.cpp -libc/src/ctype/isalpha.h -libc/src/ctype/isascii.cpp -libc/src/ctype/isascii.h -libc/src/ctype/isblank.cpp -libc/src/ctype/isblank.h -libc/src/ctype/iscntrl.cpp -libc/src/ctype/iscntrl.h -libc/src/ctype/isdigit.cpp -libc/src/ctype/isdigit.h -libc/src/ctype/isgraph.cpp -libc/src/ctype/isgraph.h -libc/src/ctype/islower.cpp -libc/src/ctype/islower.h -libc/src/ctype/isprint.cpp -libc/src/ctype/isprint.h -libc/src/ctype/ispunct.cpp -libc/src/ctype/ispunct.h -libc/src/ctype/isspace.cpp -libc/src/ctype/isspace.h -libc/src/ctype/isupper.cpp -libc/src/ctype/isupper.h -libc/src/ctype/isxdigit.cpp -libc/src/ctype/isxdigit.h -libc/src/ctype/toascii.cpp -libc/src/ctype/toascii.h -libc/src/ctype/tolower.cpp -libc/src/ctype/tolower.h -libc/src/ctype/toupper.cpp -libc/src/ctype/toupper.h -libc/src/errno/dummy_errno.cpp -libc/src/errno/dummy_errno.h -libc/src/errno/errno.cpp -libc/src/errno/llvmlibc_errno.h -libc/src/fcntl/creat.h -libc/src/fcntl/open.h -libc/src/fcntl/openat.h -libc/src/fcntl/linux/creat.cpp -libc/src/fcntl/linux/open.cpp -libc/src/fcntl/linux/openat.cpp -libc/src/fenv/feclearexcept.cpp -libc/src/fenv/feclearexcept.h -libc/src/fenv/fedisableexcept.cpp -libc/src/fenv/fedisableexcept.h -libc/src/fenv/feenableexcept.cpp -libc/src/fenv/feenableexcept.h -libc/src/fenv/fegetenv.cpp -libc/src/fenv/fegetenv.h -libc/src/fenv/fegetexcept.cpp -libc/src/fenv/fegetexcept.h -libc/src/fenv/fegetexceptflag.cpp -libc/src/fenv/fegetexceptflag.h -libc/src/fenv/fegetround.cpp -libc/src/fenv/fegetround.h -libc/src/fenv/feholdexcept.cpp -libc/src/fenv/feholdexcept.h -libc/src/fenv/feraiseexcept.cpp -libc/src/fenv/feraiseexcept.h -libc/src/fenv/fesetenv.cpp -libc/src/fenv/fesetenv.h -libc/src/fenv/fesetexceptflag.cpp -libc/src/fenv/fesetexceptflag.h -libc/src/fenv/fesetround.cpp -libc/src/fenv/fesetround.h -libc/src/fenv/fetestexcept.cpp -libc/src/fenv/fetestexcept.h -libc/src/fenv/feupdateenv.cpp -libc/src/fenv/feupdateenv.h -libc/src/inttypes/imaxdiv.cpp -libc/src/inttypes/imaxdiv.h -libc/src/inttypes/strtoimax.cpp -libc/src/inttypes/strtoimax.h -libc/src/inttypes/strtoumax.cpp -libc/src/inttypes/strtoumax.h -libc/src/math/ceil.h -libc/src/math/ceilf.h -libc/src/math/ceill.h -libc/src/math/copysign.h -libc/src/math/copysignf.h -libc/src/math/copysignl.h -libc/src/math/cos.h -libc/src/math/cosf.h -libc/src/math/exp2f.h -libc/src/math/expf.h -libc/src/math/expm1f.h -libc/src/math/fabs.h -libc/src/math/fabsf.h -libc/src/math/fabsl.h -libc/src/math/fdim.h -libc/src/math/fdimf.h -libc/src/math/fdiml.h -libc/src/math/floor.h -libc/src/math/floorf.h -libc/src/math/floorl.h -libc/src/math/fma.cpp -libc/src/math/fma.h -libc/src/math/fmaf.cpp -libc/src/math/fmaf.h -libc/src/math/fmax.h -libc/src/math/fmaxf.h -libc/src/math/fmaxl.h -libc/src/math/fmin.h -libc/src/math/fminf.h -libc/src/math/fminl.h -libc/src/math/frexp.h -libc/src/math/frexpf.h -libc/src/math/frexpl.h -libc/src/math/hypot.h -libc/src/math/hypotf.h -libc/src/math/ilogb.h -libc/src/math/ilogbf.h -libc/src/math/ilogbl.h -libc/src/math/ldexp.h -libc/src/math/ldexpf.h -libc/src/math/ldexpl.h -libc/src/math/llrint.h -libc/src/math/llrintf.h -libc/src/math/llrintl.h -libc/src/math/llround.h -libc/src/math/llroundf.h -libc/src/math/llroundl.h -libc/src/math/log10f.h -libc/src/math/log1pf.h -libc/src/math/log2f.h -libc/src/math/logb.h -libc/src/math/logbf.h -libc/src/math/logbl.h -libc/src/math/logf.h -libc/src/math/lrint.h -libc/src/math/lrintf.h -libc/src/math/lrintl.h -libc/src/math/lround.h -libc/src/math/lroundf.h -libc/src/math/lroundl.h -libc/src/math/modf.h -libc/src/math/modff.h -libc/src/math/modfl.h -libc/src/math/nearbyint.h -libc/src/math/nearbyintf.h -libc/src/math/nearbyintl.h -libc/src/math/nextafter.h -libc/src/math/nextafterf.h -libc/src/math/nextafterl.h -libc/src/math/remainder.h -libc/src/math/remainderf.h -libc/src/math/remainderl.h -libc/src/math/remquo.h -libc/src/math/remquof.h -libc/src/math/remquol.h -libc/src/math/rint.h -libc/src/math/rintf.h -libc/src/math/rintl.h -libc/src/math/round.h -libc/src/math/roundf.h -libc/src/math/roundl.h -libc/src/math/sin.h -libc/src/math/sincosf.h -libc/src/math/sinf.h -libc/src/math/sqrt.h -libc/src/math/sqrtf.h -libc/src/math/sqrtl.h -libc/src/math/tan.h -libc/src/math/trunc.h -libc/src/math/truncf.h -libc/src/math/truncl.h -libc/src/math/aarch64/ceil.cpp -libc/src/math/aarch64/ceilf.cpp -libc/src/math/aarch64/floor.cpp -libc/src/math/aarch64/floorf.cpp -libc/src/math/aarch64/round.cpp -libc/src/math/aarch64/roundf.cpp -libc/src/math/aarch64/sqrt.cpp -libc/src/math/aarch64/sqrtf.cpp -libc/src/math/aarch64/trunc.cpp -libc/src/math/aarch64/truncf.cpp -libc/src/math/generic/ceil.cpp -libc/src/math/generic/ceilf.cpp -libc/src/math/generic/ceill.cpp -libc/src/math/generic/common_constants.cpp -libc/src/math/generic/common_constants.h -libc/src/math/generic/copysign.cpp -libc/src/math/generic/copysignf.cpp -libc/src/math/generic/copysignl.cpp -libc/src/math/generic/cosf.cpp -libc/src/math/generic/dp_trig.cpp -libc/src/math/generic/dp_trig.h -libc/src/math/generic/exp2f.cpp -libc/src/math/generic/expf.cpp -libc/src/math/generic/expm1f.cpp -libc/src/math/generic/exp_utils.cpp -libc/src/math/generic/exp_utils.h -libc/src/math/generic/fabs.cpp -libc/src/math/generic/fabsf.cpp -libc/src/math/generic/fabsl.cpp -libc/src/math/generic/fdim.cpp -libc/src/math/generic/fdimf.cpp -libc/src/math/generic/fdiml.cpp -libc/src/math/generic/floor.cpp -libc/src/math/generic/floorf.cpp -libc/src/math/generic/floorl.cpp -libc/src/math/generic/fmax.cpp -libc/src/math/generic/fmaxf.cpp -libc/src/math/generic/fmaxl.cpp -libc/src/math/generic/fmin.cpp -libc/src/math/generic/fminf.cpp -libc/src/math/generic/fminl.cpp -libc/src/math/generic/frexp.cpp -libc/src/math/generic/frexpf.cpp -libc/src/math/generic/frexpl.cpp -libc/src/math/generic/hypot.cpp -libc/src/math/generic/hypotf.cpp -libc/src/math/generic/ilogb.cpp -libc/src/math/generic/ilogbf.cpp -libc/src/math/generic/ilogbl.cpp -libc/src/math/generic/ldexp.cpp -libc/src/math/generic/ldexpf.cpp -libc/src/math/generic/ldexpl.cpp -libc/src/math/generic/llrint.cpp -libc/src/math/generic/llrintf.cpp -libc/src/math/generic/llrintl.cpp -libc/src/math/generic/llround.cpp -libc/src/math/generic/llroundf.cpp -libc/src/math/generic/llroundl.cpp -libc/src/math/generic/log10f.cpp -libc/src/math/generic/log1pf.cpp -libc/src/math/generic/log2f.cpp -libc/src/math/generic/logb.cpp -libc/src/math/generic/logbf.cpp -libc/src/math/generic/logbl.cpp -libc/src/math/generic/logf.cpp -libc/src/math/generic/lrint.cpp -libc/src/math/generic/lrintf.cpp -libc/src/math/generic/lrintl.cpp -libc/src/math/generic/lround.cpp -libc/src/math/generic/lroundf.cpp -libc/src/math/generic/lroundl.cpp -libc/src/math/generic/math_utils.cpp -libc/src/math/generic/math_utils.h -libc/src/math/generic/modf.cpp -libc/src/math/generic/modff.cpp -libc/src/math/generic/modfl.cpp -libc/src/math/generic/nearbyint.cpp -libc/src/math/generic/nearbyintf.cpp -libc/src/math/generic/nearbyintl.cpp -libc/src/math/generic/nextafter.cpp -libc/src/math/generic/nextafterf.cpp -libc/src/math/generic/nextafterl.cpp -libc/src/math/generic/remainder.cpp -libc/src/math/generic/remainderf.cpp -libc/src/math/generic/remainderl.cpp -libc/src/math/generic/remquo.cpp -libc/src/math/generic/remquof.cpp -libc/src/math/generic/remquol.cpp -libc/src/math/generic/rint.cpp -libc/src/math/generic/rintf.cpp -libc/src/math/generic/rintl.cpp -libc/src/math/generic/round.cpp -libc/src/math/generic/roundf.cpp -libc/src/math/generic/roundl.cpp -libc/src/math/generic/sincosf.cpp -libc/src/math/generic/sincosf_data.cpp -libc/src/math/generic/sincosf_utils.h -libc/src/math/generic/sinf.cpp -libc/src/math/generic/sqrt.cpp -libc/src/math/generic/sqrtf.cpp -libc/src/math/generic/sqrtl.cpp -libc/src/math/generic/trunc.cpp -libc/src/math/generic/truncf.cpp -libc/src/math/generic/truncl.cpp -libc/src/math/x86_64/cos.cpp -libc/src/math/x86_64/sin.cpp -libc/src/math/x86_64/tan.cpp -libc/src/signal/raise.h -libc/src/signal/sigaction.h -libc/src/signal/sigaddset.h -libc/src/signal/sigdelset.h -libc/src/signal/sigemptyset.h -libc/src/signal/sigfillset.h -libc/src/signal/signal.h -libc/src/signal/sigprocmask.h -libc/src/signal/linux/raise.cpp -libc/src/signal/linux/sigaction.cpp -libc/src/signal/linux/sigaddset.cpp -libc/src/signal/linux/sigdelset.cpp -libc/src/signal/linux/sigemptyset.cpp -libc/src/signal/linux/sigfillset.cpp -libc/src/signal/linux/signal.cpp -libc/src/signal/linux/signal.h -libc/src/signal/linux/sigprocmask.cpp -libc/src/signal/linux/__restore.cpp -libc/src/stdio/FILE.h -libc/src/stdio/fwrite.cpp -libc/src/stdio/fwrite.h -libc/src/stdlib/abort.h -libc/src/stdlib/abs.cpp -libc/src/stdlib/abs.h -libc/src/stdlib/atexit.cpp -libc/src/stdlib/atexit.h -libc/src/stdlib/atof.cpp -libc/src/stdlib/atof.h -libc/src/stdlib/atoi.cpp -libc/src/stdlib/atoi.h -libc/src/stdlib/atol.cpp -libc/src/stdlib/atol.h -libc/src/stdlib/atoll.cpp -libc/src/stdlib/atoll.h -libc/src/stdlib/bsearch.cpp -libc/src/stdlib/bsearch.h -libc/src/stdlib/div.cpp -libc/src/stdlib/div.h -libc/src/stdlib/exit.cpp -libc/src/stdlib/exit.h -libc/src/stdlib/getenv.cpp -libc/src/stdlib/getenv.h -libc/src/stdlib/labs.cpp -libc/src/stdlib/labs.h -libc/src/stdlib/ldiv.cpp -libc/src/stdlib/ldiv.h -libc/src/stdlib/llabs.cpp -libc/src/stdlib/llabs.h -libc/src/stdlib/lldiv.cpp -libc/src/stdlib/lldiv.h -libc/src/stdlib/qsort.cpp -libc/src/stdlib/qsort.h -libc/src/stdlib/strtod.cpp -libc/src/stdlib/strtod.h -libc/src/stdlib/strtof.cpp -libc/src/stdlib/strtof.h -libc/src/stdlib/strtol.cpp -libc/src/stdlib/strtol.h -libc/src/stdlib/strtold.cpp -libc/src/stdlib/strtold.h -libc/src/stdlib/strtoll.cpp -libc/src/stdlib/strtoll.h -libc/src/stdlib/strtoul.cpp -libc/src/stdlib/strtoul.h -libc/src/stdlib/strtoull.cpp -libc/src/stdlib/strtoull.h -libc/src/stdlib/_Exit.h -libc/src/stdlib/linux/abort.cpp -libc/src/stdlib/linux/_Exit.cpp -libc/src/string/bcmp.cpp -libc/src/string/bcmp.h -libc/src/string/bzero.cpp -libc/src/string/bzero.h -libc/src/string/memccpy.cpp -libc/src/string/memccpy.h -libc/src/string/memchr.cpp -libc/src/string/memchr.h -libc/src/string/memcmp.cpp -libc/src/string/memcmp.h -libc/src/string/memcpy.cpp -libc/src/string/memcpy.h -libc/src/string/memmove.cpp -libc/src/string/memmove.h -libc/src/string/mempcpy.cpp -libc/src/string/mempcpy.h -libc/src/string/memrchr.cpp -libc/src/string/memrchr.h -libc/src/string/memset.cpp -libc/src/string/memset.h -libc/src/string/stpcpy.cpp -libc/src/string/stpcpy.h -libc/src/string/stpncpy.cpp -libc/src/string/stpncpy.h -libc/src/string/strcat.cpp -libc/src/string/strcat.h -libc/src/string/strchr.cpp -libc/src/string/strchr.h -libc/src/string/strcmp.cpp -libc/src/string/strcmp.h -libc/src/string/strcpy.cpp -libc/src/string/strcpy.h -libc/src/string/strcspn.cpp -libc/src/string/strcspn.h -libc/src/string/strdup.cpp -libc/src/string/strdup.h -libc/src/string/string_utils.h -libc/src/string/strlen.cpp -libc/src/string/strlen.h -libc/src/string/strncat.cpp -libc/src/string/strncat.h -libc/src/string/strncmp.cpp -libc/src/string/strncmp.h -libc/src/string/strncpy.cpp -libc/src/string/strncpy.h -libc/src/string/strndup.cpp -libc/src/string/strndup.h -libc/src/string/strnlen.cpp -libc/src/string/strnlen.h -libc/src/string/strpbrk.cpp -libc/src/string/strpbrk.h -libc/src/string/strrchr.cpp -libc/src/string/strrchr.h -libc/src/string/strspn.cpp -libc/src/string/strspn.h -libc/src/string/strstr.cpp -libc/src/string/strstr.h -libc/src/string/strtok.cpp -libc/src/string/strtok.h -libc/src/string/strtok_r.cpp -libc/src/string/strtok_r.h -libc/src/string/memory_utils/bcmp_implementations.h -libc/src/string/memory_utils/elements_aarch64.h -libc/src/string/memory_utils/elements_x86.h -libc/src/string/memory_utils/memcmp_implementations.h -libc/src/string/memory_utils/memcpy_implementations.h -libc/src/string/memory_utils/memset_implementations.h -libc/src/string/memory_utils/utils.h -libc/src/sys/mman/mmap.h -libc/src/sys/mman/munmap.h -libc/src/sys/mman/linux/mmap.cpp -libc/src/sys/stat/mkdir.h -libc/src/sys/stat/mkdirat.h -libc/src/sys/stat/linux/mkdir.cpp -libc/src/sys/stat/linux/mkdirat.cpp -libc/src/threads/call_once.h -libc/src/threads/cnd_broadcast.h -libc/src/threads/cnd_destroy.h -libc/src/threads/cnd_init.h -libc/src/threads/cnd_signal.h -libc/src/threads/cnd_wait.h -libc/src/threads/mtx_destroy.cpp -libc/src/threads/mtx_destroy.h -libc/src/threads/mtx_init.cpp -libc/src/threads/mtx_init.h -libc/src/threads/mtx_lock.cpp -libc/src/threads/mtx_lock.h -libc/src/threads/mtx_unlock.cpp -libc/src/threads/mtx_unlock.h -libc/src/threads/thrd_create.h -libc/src/threads/thrd_join.h -libc/src/threads/linux/call_once.cpp -libc/src/threads/linux/CndVar.h -libc/src/threads/linux/cnd_wait.cpp -libc/src/threads/linux/Futex.h -libc/src/threads/linux/thrd_create.cpp -libc/src/threads/linux/thrd_join.cpp -libc/src/threads/linux/Thread.h -libc/src/time/asctime.cpp -libc/src/time/asctime.h -libc/src/time/asctime_r.cpp -libc/src/time/asctime_r.h -libc/src/time/gmtime.cpp -libc/src/time/gmtime.h -libc/src/time/gmtime_r.cpp -libc/src/time/gmtime_r.h -libc/src/time/mktime.cpp -libc/src/time/mktime.h -libc/src/time/time_utils.cpp -libc/src/time/time_utils.h -libc/src/unistd/close.h -libc/src/unistd/fsync.h -libc/src/unistd/read.h -libc/src/unistd/rmdir.h -libc/src/unistd/unlink.h -libc/src/unistd/unlinkat.h -libc/src/unistd/write.h -libc/src/unistd/linux/close.cpp -libc/src/unistd/linux/fsync.cpp -libc/src/unistd/linux/read.cpp -libc/src/unistd/linux/rmdir.cpp -libc/src/unistd/linux/unlink.cpp -libc/src/unistd/linux/unlinkat.cpp -libc/src/unistd/linux/write.cpp -libc/src/__support/architectures.h -libc/src/__support/common.h -libc/src/__support/ctype_utils.h -libc/src/__support/detailed_powers_of_ten.h -libc/src/__support/endian.h -libc/src/__support/high_precision_decimal.h -libc/src/__support/integer_operations.h -libc/src/__support/sanitizer.h -libc/src/__support/str_to_float.h -libc/src/__support/str_to_integer.h -libc/src/__support/CPP/Array.h -libc/src/__support/CPP/ArrayRef.h -libc/src/__support/CPP/atomic.h -libc/src/__support/CPP/Bit.h -libc/src/__support/CPP/Bitset.h -libc/src/__support/CPP/Functional.h -libc/src/__support/CPP/Limits.h -libc/src/__support/CPP/Utility.h -libc/src/__support/CPP/vector.h -libc/src/__support/File/file.cpp -libc/src/__support/File/file.h -libc/src/__support/FPUtil/BasicOperations.h -libc/src/__support/FPUtil/DivisionAndRemainderOperations.h -libc/src/__support/FPUtil/FEnvImpl.h -libc/src/__support/FPUtil/FloatProperties.h -libc/src/__support/FPUtil/FMA.h -libc/src/__support/FPUtil/FPBits.h -libc/src/__support/FPUtil/ManipulationFunctions.h -libc/src/__support/FPUtil/NearestIntegerOperations.h -libc/src/__support/FPUtil/NormalFloat.h -libc/src/__support/FPUtil/PlatformDefs.h -libc/src/__support/FPUtil/PolyEval.h -libc/src/__support/FPUtil/sqrt.h -libc/src/__support/FPUtil/UInt.h -libc/src/__support/FPUtil/XFloat.h -libc/src/__support/FPUtil/aarch64/FEnvImpl.h -libc/src/__support/FPUtil/aarch64/FMA.h -libc/src/__support/FPUtil/aarch64/sqrt.h -libc/src/__support/FPUtil/generic/FMA.h -libc/src/__support/FPUtil/generic/sqrt.h -libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h -libc/src/__support/FPUtil/x86_64/FEnvImpl.h -libc/src/__support/FPUtil/x86_64/FMA.h -libc/src/__support/FPUtil/x86_64/NextAfterLongDouble.h -libc/src/__support/FPUtil/x86_64/PolyEval.h -libc/src/__support/FPUtil/x86_64/sqrt.h -libc/src/__support/OSUtil/io.h -libc/src/__support/OSUtil/quick_exit.h -libc/src/__support/OSUtil/syscall.h -libc/src/__support/OSUtil/linux/io.h -libc/src/__support/OSUtil/linux/syscall.h -libc/src/__support/OSUtil/linux/aarch64/syscall.h -libc/src/__support/OSUtil/linux/x86_64/syscall.h -libc/src/__support/threads/mutex.h -libc/src/__support/threads/linux/mutex.h -libc/utils/HdrGen/Command.cpp -libc/utils/HdrGen/Command.h -libc/utils/HdrGen/Generator.cpp -libc/utils/HdrGen/Generator.h -libc/utils/HdrGen/IncludeFileCommand.cpp -libc/utils/HdrGen/IncludeFileCommand.h -libc/utils/HdrGen/Main.cpp -libc/utils/HdrGen/PublicAPICommand.cpp -libc/utils/HdrGen/PublicAPICommand.h -libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp -libc/utils/LibcTableGenUtil/APIIndexer.cpp -libc/utils/LibcTableGenUtil/APIIndexer.h -libc/utils/MPFRWrapper/check_mpfr.cpp -libc/utils/MPFRWrapper/MPFRUtils.cpp -libc/utils/MPFRWrapper/MPFRUtils.h -libc/utils/testutils/ExecuteFunction.h -libc/utils/testutils/ExecuteFunctionUnix.cpp -libc/utils/testutils/FDReader.h -libc/utils/testutils/FDReaderUnix.cpp -libc/utils/testutils/RandUtils.cpp -libc/utils/testutils/RandUtils.h -libc/utils/testutils/StreamWrapper.h -libc/utils/testutils/Timer.cpp -libc/utils/testutils/Timer.h -libc/utils/tools/WrapperGen/Main.cpp -libc/utils/UnitTest/FPExceptMatcher.cpp -libc/utils/UnitTest/FPExceptMatcher.h -libc/utils/UnitTest/FPMatcher.cpp -libc/utils/UnitTest/FPMatcher.h -libc/utils/UnitTest/FuchsiaTest.h -libc/utils/UnitTest/LibcTest.cpp -libc/utils/UnitTest/LibcTestMain.cpp -libc/utils/UnitTest/MemoryMatcher.cpp -libc/utils/UnitTest/MemoryMatcher.h -libc/utils/UnitTest/PlatformDefs.h -libc/utils/UnitTest/Test.h -libclc/generic/include/config.h -libclc/generic/include/clc/as_type.h -libclc/generic/include/clc/clcfunc.h -libclc/generic/include/clc/async/async_work_group_copy.h -libclc/generic/include/clc/async/async_work_group_strided_copy.h -libclc/generic/include/clc/async/prefetch.h -libclc/generic/include/clc/async/wait_group_events.h -libclc/generic/include/clc/atomic/atomic_add.h -libclc/generic/include/clc/atomic/atomic_and.h -libclc/generic/include/clc/atomic/atomic_max.h -libclc/generic/include/clc/atomic/atomic_min.h -libclc/generic/include/clc/atomic/atomic_or.h -libclc/generic/include/clc/atomic/atomic_sub.h -libclc/generic/include/clc/atomic/atomic_xor.h -libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_add.h -libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_dec.h -libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_inc.h -libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_sub.h -libclc/generic/include/clc/cl_khr_global_int32_base_atomics/atom_xchg.h -libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_and.h -libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_max.h -libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_min.h -libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_or.h -libclc/generic/include/clc/cl_khr_global_int32_extended_atomics/atom_xor.h -libclc/generic/include/clc/cl_khr_int64_base_atomics/atom_add.h -libclc/generic/include/clc/cl_khr_int64_base_atomics/atom_sub.h -libclc/generic/include/clc/cl_khr_int64_base_atomics/atom_xchg.h -libclc/generic/include/clc/cl_khr_int64_extended_atomics/atom_and.h -libclc/generic/include/clc/cl_khr_int64_extended_atomics/atom_max.h -libclc/generic/include/clc/cl_khr_int64_extended_atomics/atom_min.h -libclc/generic/include/clc/cl_khr_int64_extended_atomics/atom_or.h -libclc/generic/include/clc/cl_khr_int64_extended_atomics/atom_xor.h -libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_add.h -libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_dec.h -libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_inc.h -libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_sub.h -libclc/generic/include/clc/cl_khr_local_int32_base_atomics/atom_xchg.h -libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_and.h -libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_max.h -libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_min.h -libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_or.h -libclc/generic/include/clc/cl_khr_local_int32_extended_atomics/atom_xor.h -libclc/generic/include/clc/common/degrees.h -libclc/generic/include/clc/common/mix.h -libclc/generic/include/clc/common/radians.h -libclc/generic/include/clc/common/sign.h -libclc/generic/include/clc/common/smoothstep.h -libclc/generic/include/clc/common/step.h -libclc/generic/include/clc/explicit_fence/explicit_memory_fence.h -libclc/generic/include/clc/geometric/cross.h -libclc/generic/include/clc/geometric/distance.h -libclc/generic/include/clc/geometric/dot.h -libclc/generic/include/clc/geometric/fast_distance.h -libclc/generic/include/clc/geometric/fast_length.h -libclc/generic/include/clc/geometric/fast_normalize.h -libclc/generic/include/clc/geometric/length.h -libclc/generic/include/clc/geometric/normalize.h -libclc/generic/include/clc/integer/abs.h -libclc/generic/include/clc/integer/abs_diff.h -libclc/generic/include/clc/integer/add_sat.h -libclc/generic/include/clc/integer/clz.h -libclc/generic/include/clc/integer/hadd.h -libclc/generic/include/clc/integer/mad24.h -libclc/generic/include/clc/integer/mad_sat.h -libclc/generic/include/clc/integer/mul24.h -libclc/generic/include/clc/integer/mul_hi.h -libclc/generic/include/clc/integer/popcount.h -libclc/generic/include/clc/integer/rhadd.h -libclc/generic/include/clc/integer/rotate.h -libclc/generic/include/clc/integer/sub_sat.h -libclc/generic/include/clc/math/acos.h -libclc/generic/include/clc/math/acosh.h -libclc/generic/include/clc/math/acospi.h -libclc/generic/include/clc/math/asin.h -libclc/generic/include/clc/math/asinh.h -libclc/generic/include/clc/math/asinpi.h -libclc/generic/include/clc/math/atan.h -libclc/generic/include/clc/math/atan2.h -libclc/generic/include/clc/math/atan2pi.h -libclc/generic/include/clc/math/atanh.h -libclc/generic/include/clc/math/atanpi.h -libclc/generic/include/clc/math/cbrt.h -libclc/generic/include/clc/math/ceil.h -libclc/generic/include/clc/math/copysign.h -libclc/generic/include/clc/math/cos.h -libclc/generic/include/clc/math/cosh.h -libclc/generic/include/clc/math/cospi.h -libclc/generic/include/clc/math/erf.h -libclc/generic/include/clc/math/erfc.h -libclc/generic/include/clc/math/exp.h -libclc/generic/include/clc/math/exp10.h -libclc/generic/include/clc/math/exp2.h -libclc/generic/include/clc/math/expm1.h -libclc/generic/include/clc/math/fabs.h -libclc/generic/include/clc/math/fdim.h -libclc/generic/include/clc/math/floor.h -libclc/generic/include/clc/math/fma.h -libclc/generic/include/clc/math/fmod.h -libclc/generic/include/clc/math/fract.h -libclc/generic/include/clc/math/frexp.h -libclc/generic/include/clc/math/half_cos.h -libclc/generic/include/clc/math/half_divide.h -libclc/generic/include/clc/math/half_exp.h -libclc/generic/include/clc/math/half_exp10.h -libclc/generic/include/clc/math/half_exp2.h -libclc/generic/include/clc/math/half_log.h -libclc/generic/include/clc/math/half_log10.h -libclc/generic/include/clc/math/half_log2.h -libclc/generic/include/clc/math/half_powr.h -libclc/generic/include/clc/math/half_recip.h -libclc/generic/include/clc/math/half_rsqrt.h -libclc/generic/include/clc/math/half_sin.h -libclc/generic/include/clc/math/half_sqrt.h -libclc/generic/include/clc/math/half_tan.h -libclc/generic/include/clc/math/hypot.h -libclc/generic/include/clc/math/ilogb.h -libclc/generic/include/clc/math/ldexp.h -libclc/generic/include/clc/math/lgamma.h -libclc/generic/include/clc/math/lgamma_r.h -libclc/generic/include/clc/math/log.h -libclc/generic/include/clc/math/log10.h -libclc/generic/include/clc/math/log1p.h -libclc/generic/include/clc/math/log2.h -libclc/generic/include/clc/math/logb.h -libclc/generic/include/clc/math/mad.h -libclc/generic/include/clc/math/maxmag.h -libclc/generic/include/clc/math/minmag.h -libclc/generic/include/clc/math/modf.h -libclc/generic/include/clc/math/native_cos.h -libclc/generic/include/clc/math/native_divide.h -libclc/generic/include/clc/math/native_exp.h -libclc/generic/include/clc/math/native_exp10.h -libclc/generic/include/clc/math/native_exp2.h -libclc/generic/include/clc/math/native_log.h -libclc/generic/include/clc/math/native_log10.h -libclc/generic/include/clc/math/native_log2.h -libclc/generic/include/clc/math/native_powr.h -libclc/generic/include/clc/math/native_recip.h -libclc/generic/include/clc/math/native_rsqrt.h -libclc/generic/include/clc/math/native_sin.h -libclc/generic/include/clc/math/native_sqrt.h -libclc/generic/include/clc/math/native_tan.h -libclc/generic/include/clc/math/nextafter.h -libclc/generic/include/clc/math/pow.h -libclc/generic/include/clc/math/pown.h -libclc/generic/include/clc/math/powr.h -libclc/generic/include/clc/math/remainder.h -libclc/generic/include/clc/math/remquo.h -libclc/generic/include/clc/math/rint.h -libclc/generic/include/clc/math/rootn.h -libclc/generic/include/clc/math/round.h -libclc/generic/include/clc/math/rsqrt.h -libclc/generic/include/clc/math/sin.h -libclc/generic/include/clc/math/sincos.h -libclc/generic/include/clc/math/sinh.h -libclc/generic/include/clc/math/sinpi.h -libclc/generic/include/clc/math/sqrt.h -libclc/generic/include/clc/math/tan.h -libclc/generic/include/clc/math/tanh.h -libclc/generic/include/clc/math/tanpi.h -libclc/generic/include/clc/math/tgamma.h -libclc/generic/include/clc/math/trunc.h -libclc/generic/include/clc/relational/bitselect.h -libclc/generic/include/clc/relational/isfinite.h -libclc/generic/include/clc/relational/isgreater.h -libclc/generic/include/clc/relational/isgreaterequal.h -libclc/generic/include/clc/relational/isless.h -libclc/generic/include/clc/relational/islessequal.h -libclc/generic/include/clc/relational/islessgreater.h -libclc/generic/include/clc/relational/isnormal.h -libclc/generic/include/clc/relational/isnotequal.h -libclc/generic/include/clc/relational/isordered.h -libclc/generic/include/clc/relational/isunordered.h -libclc/generic/include/clc/relational/signbit.h -libclc/generic/include/clc/shared/clamp.h -libclc/generic/include/clc/shared/max.h -libclc/generic/include/clc/shared/min.h -libclc/generic/include/clc/synchronization/barrier.h -libclc/generic/include/clc/synchronization/cl_mem_fence_flags.h -libclc/generic/include/clc/workitem/get_global_id.h -libclc/generic/include/clc/workitem/get_global_offset.h -libclc/generic/include/clc/workitem/get_global_size.h -libclc/generic/include/clc/workitem/get_group_id.h -libclc/generic/include/clc/workitem/get_local_id.h -libclc/generic/include/clc/workitem/get_local_size.h -libclc/generic/include/clc/workitem/get_num_groups.h -libclc/generic/include/clc/workitem/get_work_dim.h -libclc/generic/include/integer/popcount.h -libclc/generic/include/math/clc_exp10.h -libclc/generic/include/math/clc_fma.h -libclc/generic/include/math/clc_fmod.h -libclc/generic/include/math/clc_hypot.h -libclc/generic/include/math/clc_ldexp.h -libclc/generic/include/math/clc_nextafter.h -libclc/generic/include/math/clc_pow.h -libclc/generic/include/math/clc_pown.h -libclc/generic/include/math/clc_powr.h -libclc/generic/include/math/clc_remainder.h -libclc/generic/include/math/clc_remquo.h -libclc/generic/include/math/clc_rootn.h -libclc/generic/include/math/clc_sqrt.h -libclc/generic/include/math/clc_tan.h -libclc/generic/include/math/clc_tanpi.h -libclc/generic/lib/math/ep_log.h -libcxx/benchmarks/format.bench.cpp -libcxx/benchmarks/formatted_size.bench.cpp -libcxx/benchmarks/formatter_float.bench.cpp -libcxx/benchmarks/format_to.bench.cpp -libcxx/benchmarks/format_to_n.bench.cpp -libcxx/benchmarks/to_chars.bench.cpp -libcxx/benchmarks/util_smartptr.bench.cpp -libcxx/benchmarks/variant_visit_1.bench.cpp -libcxx/benchmarks/variant_visit_2.bench.cpp -libcxx/benchmarks/variant_visit_3.bench.cpp -libcxx/include/__algorithm/adjacent_find.h -libcxx/include/__algorithm/all_of.h -libcxx/include/__algorithm/any_of.h -libcxx/include/__algorithm/count.h -libcxx/include/__algorithm/count_if.h -libcxx/include/__algorithm/find.h -libcxx/include/__algorithm/find_first_of.h -libcxx/include/__algorithm/find_if.h -libcxx/include/__algorithm/find_if_not.h -libcxx/include/__algorithm/for_each.h -libcxx/include/__algorithm/for_each_n.h -libcxx/include/__algorithm/iter_swap.h -libcxx/include/__algorithm/mismatch.h -libcxx/include/__algorithm/none_of.h -libcxx/include/__algorithm/swap_ranges.h -libcxx/include/__compare/is_eq.h -libcxx/include/__filesystem/file_time_type.h -libcxx/include/__filesystem/file_type.h -libcxx/include/__filesystem/space_info.h -libcxx/include/__format/formatter_floating_point.h -libcxx/include/__format/formatter_pointer.h -libcxx/include/__memory/voidify.h -libcxx/include/__numeric/exclusive_scan.h -libcxx/include/__numeric/inclusive_scan.h -libcxx/include/__numeric/reduce.h -libcxx/include/__numeric/transform_reduce.h -libcxx/include/__random/default_random_engine.h -libcxx/include/__random/knuth_b.h -libcxx/include/__ranges/dangling.h -libcxx/include/__ranges/enable_borrowed_range.h -libcxx/include/__support/ibm/gettod_zos.h -libcxx/include/__support/ibm/nanosleep.h -libcxx/include/__support/openbsd/xlocale.h -libcxx/include/__support/solaris/floatingpoint.h -libcxx/include/__support/solaris/wchar.h -libcxx/include/__utility/auto_cast.h -libcxx/include/__utility/declval.h -libcxx/include/__utility/forward.h -libcxx/include/__utility/move.h -libcxx/include/__utility/swap.h -libcxx/src/chrono_system_time_init.h -libcxx/src/format.cpp -libcxx/src/ios.instantiations.cpp -libcxx/src/iostream_init.h -libcxx/src/legacy_pointer_safety.cpp -libcxx/src/utility.cpp -libcxx/src/experimental/memory_resource_init_helper.h -libcxx/src/include/to_chars_floating_point.h -libcxx/src/include/ryu/common.h -libcxx/src/include/ryu/d2fixed.h -libcxx/src/include/ryu/d2fixed_full_table.h -libcxx/src/include/ryu/d2s.h -libcxx/src/include/ryu/d2s_full_table.h -libcxx/src/include/ryu/d2s_intrinsics.h -libcxx/src/include/ryu/digit_table.h -libcxx/src/include/ryu/f2s.h -libcxx/src/ryu/d2fixed.cpp -libcxx/src/ryu/d2s.cpp -libcxx/src/ryu/f2s.cpp -libcxxabi/src/cxa_guard_impl.h -libcxxabi/src/demangle/Utility.h -libunwind/src/cet_unwind.h -lld/COFF/CallGraphSort.cpp -lld/COFF/CallGraphSort.h -lld/COFF/COFFLinkerContext.cpp -lld/COFF/COFFLinkerContext.h -lld/COFF/DebugTypes.cpp -lld/COFF/DLL.h -lld/COFF/ICF.h -lld/COFF/MarkLive.h -lld/COFF/MinGW.h -lld/COFF/SymbolTable.cpp -lld/COFF/SymbolTable.h -lld/COFF/TypeMerger.h -lld/COFF/Writer.h -lld/Common/Args.cpp -lld/Common/CommonLinkerContext.cpp -lld/Common/DWARF.cpp -lld/Common/Memory.cpp -lld/Common/Reproduce.cpp -lld/Common/Strings.cpp -lld/Common/TargetOptionsCommandFlags.cpp -lld/Common/Timer.cpp -lld/Common/Version.cpp -lld/ELF/AArch64ErrataFix.h -lld/ELF/ARMErrataFix.h -lld/ELF/CallGraphSort.cpp -lld/ELF/CallGraphSort.h -lld/ELF/Driver.h -lld/ELF/DWARF.cpp -lld/ELF/DWARF.h -lld/ELF/EhFrame.h -lld/ELF/ICF.h -lld/ELF/LinkerScript.cpp -lld/ELF/LTO.h -lld/ELF/MapFile.h -lld/ELF/MarkLive.cpp -lld/ELF/MarkLive.h -lld/ELF/OutputSections.h -lld/ELF/Relocations.h -lld/ELF/ScriptLexer.cpp -lld/ELF/ScriptLexer.h -lld/ELF/ScriptParser.h -lld/ELF/Symbols.cpp -lld/ELF/Symbols.h -lld/ELF/SymbolTable.cpp -lld/ELF/SymbolTable.h -lld/ELF/Target.cpp -lld/ELF/Writer.h -lld/ELF/Arch/AVR.cpp -lld/ELF/Arch/MipsArchTree.cpp -lld/ELF/Arch/MSP430.cpp -lld/ELF/Arch/SPARCV9.cpp -lld/include/lld/Common/Args.h -lld/include/lld/Common/Arrays.h -lld/include/lld/Common/CommonLinkerContext.h -lld/include/lld/Common/Driver.h -lld/include/lld/Common/DWARF.h -lld/include/lld/Common/Filesystem.h -lld/include/lld/Common/Strings.h -lld/include/lld/Common/Timer.h -lld/include/lld/Core/Pass.h -lld/include/lld/Core/SharedLibraryAtom.h -lld/include/lld/Core/UndefinedAtom.h -lld/include/lld/Core/Writer.h -lld/MachO/ConcatOutputSection.cpp -lld/MachO/ConcatOutputSection.h -lld/MachO/Config.h -lld/MachO/Driver.cpp -lld/MachO/Driver.h -lld/MachO/DriverUtils.cpp -lld/MachO/Dwarf.cpp -lld/MachO/Dwarf.h -lld/MachO/ExportTrie.cpp -lld/MachO/ExportTrie.h -lld/MachO/ICF.cpp -lld/MachO/ICF.h -lld/MachO/InputFiles.cpp -lld/MachO/InputFiles.h -lld/MachO/InputSection.cpp -lld/MachO/InputSection.h -lld/MachO/LTO.cpp -lld/MachO/LTO.h -lld/MachO/MachOStructs.h -lld/MachO/MapFile.cpp -lld/MachO/MapFile.h -lld/MachO/MarkLive.cpp -lld/MachO/MarkLive.h -lld/MachO/ObjC.cpp -lld/MachO/ObjC.h -lld/MachO/OutputSection.h -lld/MachO/OutputSegment.cpp -lld/MachO/OutputSegment.h -lld/MachO/Relocations.cpp -lld/MachO/Relocations.h -lld/MachO/SectionPriorities.cpp -lld/MachO/SectionPriorities.h -lld/MachO/Symbols.cpp -lld/MachO/Symbols.h -lld/MachO/SymbolTable.cpp -lld/MachO/SymbolTable.h -lld/MachO/SyntheticSections.h -lld/MachO/Target.cpp -lld/MachO/Target.h -lld/MachO/UnwindInfoSection.cpp -lld/MachO/UnwindInfoSection.h -lld/MachO/Writer.cpp -lld/MachO/Writer.h -lld/MachO/Arch/ARM.cpp -lld/MachO/Arch/ARM64.cpp -lld/MachO/Arch/ARM64Common.cpp -lld/MachO/Arch/ARM64Common.h -lld/MachO/Arch/ARM64_32.cpp -lld/MachO/Arch/X86_64.cpp -lld/MinGW/Driver.cpp -lld/tools/lld/lld.cpp -lld/wasm/Config.h -lld/wasm/InputChunks.h -lld/wasm/InputElement.h -lld/wasm/InputFiles.h -lld/wasm/MapFile.cpp -lld/wasm/MapFile.h -lld/wasm/MarkLive.cpp -lld/wasm/MarkLive.h -lld/wasm/OutputSections.cpp -lld/wasm/OutputSections.h -lld/wasm/OutputSegment.cpp -lld/wasm/OutputSegment.h -lld/wasm/Relocations.h -lld/wasm/Writer.h -lld/wasm/WriterUtils.h -lldb/bindings/python/python-typemaps.h -lldb/examples/darwin/heap_find/heap/heap_find.cpp -lldb/examples/plugins/commands/fooplugin.cpp -lldb/examples/synthetic/bitfield/program.cpp -lldb/include/lldb/lldb-defines.h -lldb/include/lldb/lldb-forward.h -lldb/include/lldb/lldb-private.h -lldb/include/lldb/lldb-public.h -lldb/include/lldb/lldb-versioning.h -lldb/include/lldb/API/LLDB.h -lldb/include/lldb/API/SBAddress.h -lldb/include/lldb/API/SBAttachInfo.h -lldb/include/lldb/API/SBBlock.h -lldb/include/lldb/API/SBBroadcaster.h -lldb/include/lldb/API/SBCommandInterpreter.h -lldb/include/lldb/API/SBCommandInterpreterRunOptions.h -lldb/include/lldb/API/SBCommandReturnObject.h -lldb/include/lldb/API/SBCommunication.h -lldb/include/lldb/API/SBCompileUnit.h -lldb/include/lldb/API/SBData.h -lldb/include/lldb/API/SBDebugger.h -lldb/include/lldb/API/SBDeclaration.h -lldb/include/lldb/API/SBEnvironment.h -lldb/include/lldb/API/SBError.h -lldb/include/lldb/API/SBEvent.h -lldb/include/lldb/API/SBExecutionContext.h -lldb/include/lldb/API/SBExpressionOptions.h -lldb/include/lldb/API/SBFile.h -lldb/include/lldb/API/SBFileSpec.h -lldb/include/lldb/API/SBFileSpecList.h -lldb/include/lldb/API/SBFunction.h -lldb/include/lldb/API/SBHostOS.h -lldb/include/lldb/API/SBInstruction.h -lldb/include/lldb/API/SBLanguageRuntime.h -lldb/include/lldb/API/SBLaunchInfo.h -lldb/include/lldb/API/SBLineEntry.h -lldb/include/lldb/API/SBListener.h -lldb/include/lldb/API/SBMemoryRegionInfo.h -lldb/include/lldb/API/SBMemoryRegionInfoList.h -lldb/include/lldb/API/SBModule.h -lldb/include/lldb/API/SBModuleSpec.h -lldb/include/lldb/API/SBPlatform.h -lldb/include/lldb/API/SBProcess.h -lldb/include/lldb/API/SBProcessInfo.h -lldb/include/lldb/API/SBQueue.h -lldb/include/lldb/API/SBQueueItem.h -lldb/include/lldb/API/SBReproducer.h -lldb/include/lldb/API/SBSourceManager.h -lldb/include/lldb/API/SBStream.h -lldb/include/lldb/API/SBStringList.h -lldb/include/lldb/API/SBSymbol.h -lldb/include/lldb/API/SBSymbolContext.h -lldb/include/lldb/API/SBSymbolContextList.h -lldb/include/lldb/API/SBThread.h -lldb/include/lldb/API/SBThreadCollection.h -lldb/include/lldb/API/SBTrace.h -lldb/include/lldb/API/SBType.h -lldb/include/lldb/API/SBTypeCategory.h -lldb/include/lldb/API/SBTypeEnumMember.h -lldb/include/lldb/API/SBTypeFilter.h -lldb/include/lldb/API/SBTypeFormat.h -lldb/include/lldb/API/SBTypeNameSpecifier.h -lldb/include/lldb/API/SBTypeSummary.h -lldb/include/lldb/API/SBTypeSynthetic.h -lldb/include/lldb/API/SBUnixSignals.h -lldb/include/lldb/API/SBValue.h -lldb/include/lldb/API/SBValueList.h -lldb/include/lldb/API/SBVariablesOptions.h -lldb/include/lldb/API/SBWatchpoint.h -lldb/include/lldb/Breakpoint/BreakpointID.h -lldb/include/lldb/Breakpoint/BreakpointList.h -lldb/include/lldb/Breakpoint/BreakpointLocationList.h -lldb/include/lldb/Breakpoint/BreakpointPrecondition.h -lldb/include/lldb/Breakpoint/BreakpointResolverFileLine.h -lldb/include/lldb/Breakpoint/BreakpointSiteList.h -lldb/include/lldb/Breakpoint/Stoppoint.h -lldb/include/lldb/Breakpoint/WatchpointList.h -lldb/include/lldb/Breakpoint/WatchpointOptions.h -lldb/include/lldb/Core/AddressResolver.h -lldb/include/lldb/Core/AddressResolverFileLine.h -lldb/include/lldb/Core/DataFileCache.h -lldb/include/lldb/Core/Debugger.h -lldb/include/lldb/Core/Declaration.h -lldb/include/lldb/Core/DumpRegisterValue.h -lldb/include/lldb/Core/EmulateInstruction.h -lldb/include/lldb/Core/Highlighter.h -lldb/include/lldb/Core/IOHandlerCursesGUI.h -lldb/include/lldb/Core/LoadedModuleInfoList.h -lldb/include/lldb/Core/MappedHash.h -lldb/include/lldb/Core/ModuleChild.h -lldb/include/lldb/Core/Opcode.h -lldb/include/lldb/Core/PluginInterface.h -lldb/include/lldb/Core/PluginManager.h -lldb/include/lldb/Core/Progress.h -lldb/include/lldb/Core/RichManglingContext.h -lldb/include/lldb/Core/Section.h -lldb/include/lldb/Core/SourceLocationSpec.h -lldb/include/lldb/Core/SourceManager.h -lldb/include/lldb/Core/StreamAsynchronousIO.h -lldb/include/lldb/Core/StreamBuffer.h -lldb/include/lldb/Core/StreamFile.h -lldb/include/lldb/Core/ThreadSafeValue.h -lldb/include/lldb/Core/UniqueCStringMap.h -lldb/include/lldb/Core/ValueObjectConstResultCast.h -lldb/include/lldb/Core/ValueObjectDynamicValue.h -lldb/include/lldb/Core/ValueObjectMemory.h -lldb/include/lldb/Core/ValueObjectRegister.h -lldb/include/lldb/Core/ValueObjectSyntheticFilter.h -lldb/include/lldb/Core/ValueObjectUpdater.h -lldb/include/lldb/DataFormatters/CXXFunctionPointer.h -lldb/include/lldb/DataFormatters/DumpValueObjectOptions.h -lldb/include/lldb/DataFormatters/FormattersContainer.h -lldb/include/lldb/DataFormatters/FormattersHelpers.h -lldb/include/lldb/DataFormatters/TypeCategory.h -lldb/include/lldb/DataFormatters/TypeCategoryMap.h -lldb/include/lldb/DataFormatters/TypeSummary.h -lldb/include/lldb/DataFormatters/ValueObjectPrinter.h -lldb/include/lldb/DataFormatters/VectorIterator.h -lldb/include/lldb/DataFormatters/VectorType.h -lldb/include/lldb/Expression/DWARFExpression.h -lldb/include/lldb/Expression/DynamicCheckerFunctions.h -lldb/include/lldb/Expression/ExpressionSourceCode.h -lldb/include/lldb/Expression/ExpressionTypeSystemHelper.h -lldb/include/lldb/Expression/Materializer.h -lldb/include/lldb/Expression/REPL.h -lldb/include/lldb/Expression/UtilityFunction.h -lldb/include/lldb/Host/ConnectionFileDescriptor.h -lldb/include/lldb/Host/File.h -lldb/include/lldb/Host/FileAction.h -lldb/include/lldb/Host/FileSystem.h -lldb/include/lldb/Host/Host.h -lldb/include/lldb/Host/HostGetOpt.h -lldb/include/lldb/Host/HostInfo.h -lldb/include/lldb/Host/HostNativeProcess.h -lldb/include/lldb/Host/HostNativeThread.h -lldb/include/lldb/Host/LockFile.h -lldb/include/lldb/Host/MainLoop.h -lldb/include/lldb/Host/MainLoopBase.h -lldb/include/lldb/Host/Pipe.h -lldb/include/lldb/Host/ProcessRunLock.h -lldb/include/lldb/Host/PseudoTerminal.h -lldb/include/lldb/Host/SafeMachO.h -lldb/include/lldb/Host/Socket.h -lldb/include/lldb/Host/Terminal.h -lldb/include/lldb/Host/Time.h -lldb/include/lldb/Host/XML.h -lldb/include/lldb/Host/android/HostInfoAndroid.h -lldb/include/lldb/Host/common/GetOptInc.h -lldb/include/lldb/Host/common/NativeRegisterContext.h -lldb/include/lldb/Host/linux/Host.h -lldb/include/lldb/Host/linux/Ptrace.h -lldb/include/lldb/Host/linux/Support.h -lldb/include/lldb/Host/linux/Uio.h -lldb/include/lldb/Host/posix/ConnectionFileDescriptorPosix.h -lldb/include/lldb/Host/posix/Fcntl.h -lldb/include/lldb/Host/posix/HostProcessPosix.h -lldb/include/lldb/Host/posix/HostThreadPosix.h -lldb/include/lldb/Host/posix/LockFilePosix.h -lldb/include/lldb/Host/posix/PipePosix.h -lldb/include/lldb/Host/posix/ProcessLauncherPosixFork.h -lldb/include/lldb/Host/windows/LockFileWindows.h -lldb/include/lldb/Host/windows/PipeWindows.h -lldb/include/lldb/Host/windows/PosixApi.h -lldb/include/lldb/Host/windows/windows.h -lldb/include/lldb/Initialization/SystemInitializerCommon.h -lldb/include/lldb/Interpreter/CommandAlias.h -lldb/include/lldb/Interpreter/CommandCompletions.h -lldb/include/lldb/Interpreter/CommandHistory.h -lldb/include/lldb/Interpreter/CommandOptionValidators.h -lldb/include/lldb/Interpreter/OptionArgParser.h -lldb/include/lldb/Interpreter/OptionGroupArchitecture.h -lldb/include/lldb/Interpreter/OptionGroupBoolean.h -lldb/include/lldb/Interpreter/OptionGroupFile.h -lldb/include/lldb/Interpreter/OptionGroupFormat.h -lldb/include/lldb/Interpreter/OptionGroupOutputFile.h -lldb/include/lldb/Interpreter/OptionGroupPlatform.h -lldb/include/lldb/Interpreter/OptionGroupString.h -lldb/include/lldb/Interpreter/OptionGroupUInt64.h -lldb/include/lldb/Interpreter/OptionGroupUUID.h -lldb/include/lldb/Interpreter/OptionGroupValueObjectDisplay.h -lldb/include/lldb/Interpreter/OptionGroupWatchpoint.h -lldb/include/lldb/Interpreter/Options.h -lldb/include/lldb/Interpreter/OptionValueArch.h -lldb/include/lldb/Interpreter/OptionValueArgs.h -lldb/include/lldb/Interpreter/OptionValueArray.h -lldb/include/lldb/Interpreter/OptionValueBoolean.h -lldb/include/lldb/Interpreter/OptionValueChar.h -lldb/include/lldb/Interpreter/OptionValueEnumeration.h -lldb/include/lldb/Interpreter/OptionValueFileSpec.h -lldb/include/lldb/Interpreter/OptionValueFileSpecList.h -lldb/include/lldb/Interpreter/OptionValueFormatEntity.h -lldb/include/lldb/Interpreter/OptionValueLanguage.h -lldb/include/lldb/Interpreter/OptionValuePathMappings.h -lldb/include/lldb/Interpreter/OptionValueRegex.h -lldb/include/lldb/Interpreter/OptionValues.h -lldb/include/lldb/Interpreter/OptionValueSInt64.h -lldb/include/lldb/Interpreter/OptionValueString.h -lldb/include/lldb/Interpreter/OptionValueUInt64.h -lldb/include/lldb/Interpreter/Property.h -lldb/include/lldb/Interpreter/ScriptedInterface.h -lldb/include/lldb/Interpreter/ScriptedProcessInterface.h -lldb/include/lldb/Symbol/ArmUnwindInfo.h -lldb/include/lldb/Symbol/CompilerDecl.h -lldb/include/lldb/Symbol/CompilerDeclContext.h -lldb/include/lldb/Symbol/DeclVendor.h -lldb/include/lldb/Symbol/Function.h -lldb/include/lldb/Symbol/LocateSymbolFile.h -lldb/include/lldb/Symbol/SourceModule.h -lldb/include/lldb/Symbol/SymbolContextScope.h -lldb/include/lldb/Symbol/SymbolVendor.h -lldb/include/lldb/Symbol/Type.h -lldb/include/lldb/Symbol/TypeList.h -lldb/include/lldb/Symbol/TypeMap.h -lldb/include/lldb/Symbol/UnwindTable.h -lldb/include/lldb/Symbol/Variable.h -lldb/include/lldb/Target/AppleArm64ExceptionClass.h -lldb/include/lldb/Target/AssertFrameRecognizer.h -lldb/include/lldb/Target/DynamicRegisterInfo.h -lldb/include/lldb/Target/ExecutionContextScope.h -lldb/include/lldb/Target/InstrumentationRuntime.h -lldb/include/lldb/Target/InstrumentationRuntimeStopInfo.h -lldb/include/lldb/Target/JITLoader.h -lldb/include/lldb/Target/JITLoaderList.h -lldb/include/lldb/Target/MemoryTagManager.h -lldb/include/lldb/Target/MemoryTagMap.h -lldb/include/lldb/Target/ModuleCache.h -lldb/include/lldb/Target/OperatingSystem.h -lldb/include/lldb/Target/PostMortemProcess.h -lldb/include/lldb/Target/ProcessTrace.h -lldb/include/lldb/Target/Queue.h -lldb/include/lldb/Target/QueueItem.h -lldb/include/lldb/Target/QueueList.h -lldb/include/lldb/Target/RegisterCheckpoint.h -lldb/include/lldb/Target/RegisterContext.h -lldb/include/lldb/Target/RegisterNumber.h -lldb/include/lldb/Target/Runtime.h -lldb/include/lldb/Target/SectionLoadHistory.h -lldb/include/lldb/Target/StackFrame.h -lldb/include/lldb/Target/StackFrameList.h -lldb/include/lldb/Target/StackID.h -lldb/include/lldb/Target/Statistics.h -lldb/include/lldb/Target/StopInfo.h -lldb/include/lldb/Target/SystemRuntime.h -lldb/include/lldb/Target/TargetList.h -lldb/include/lldb/Target/ThreadCollection.h -lldb/include/lldb/Target/ThreadList.h -lldb/include/lldb/Target/ThreadPlan.h -lldb/include/lldb/Target/ThreadPlanBase.h -lldb/include/lldb/Target/ThreadPlanCallFunction.h -lldb/include/lldb/Target/ThreadPlanCallFunctionUsingABI.h -lldb/include/lldb/Target/ThreadPlanRunToAddress.h -lldb/include/lldb/Target/ThreadPlanShouldStopHere.h -lldb/include/lldb/Target/ThreadPlanStepInstruction.h -lldb/include/lldb/Target/ThreadPlanStepOut.h -lldb/include/lldb/Target/ThreadPlanStepOverBreakpoint.h -lldb/include/lldb/Target/ThreadPlanStepOverRange.h -lldb/include/lldb/Target/ThreadPlanStepThrough.h -lldb/include/lldb/Target/ThreadPlanStepUntil.h -lldb/include/lldb/Target/ThreadPlanTracer.h -lldb/include/lldb/Target/Trace.h -lldb/include/lldb/Target/TraceCursor.h -lldb/include/lldb/Target/TraceExporter.h -lldb/include/lldb/Target/TraceInstructionDumper.h -lldb/include/lldb/Target/Unwind.h -lldb/include/lldb/Target/UnwindAssembly.h -lldb/include/lldb/Target/UnwindLLDB.h -lldb/include/lldb/Utility/ArchSpec.h -lldb/include/lldb/Utility/Args.h -lldb/include/lldb/Utility/Baton.h -lldb/include/lldb/Utility/Broadcaster.h -lldb/include/lldb/Utility/CompletionRequest.h -lldb/include/lldb/Utility/ConstString.h -lldb/include/lldb/Utility/DataBuffer.h -lldb/include/lldb/Utility/DataBufferHeap.h -lldb/include/lldb/Utility/DataEncoder.h -lldb/include/lldb/Utility/FileSpec.h -lldb/include/lldb/Utility/Flags.h -lldb/include/lldb/Utility/GDBRemote.h -lldb/include/lldb/Utility/Instrumentation.h -lldb/include/lldb/Utility/IOObject.h -lldb/include/lldb/Utility/LLDBAssert.h -lldb/include/lldb/Utility/LLDBLog.h -lldb/include/lldb/Utility/Predicate.h -lldb/include/lldb/Utility/ProcessInfo.h -lldb/include/lldb/Utility/RangeMap.h -lldb/include/lldb/Utility/RegisterValue.h -lldb/include/lldb/Utility/RegularExpression.h -lldb/include/lldb/Utility/Reproducer.h -lldb/include/lldb/Utility/ReproducerProvider.h -lldb/include/lldb/Utility/SelectHelper.h -lldb/include/lldb/Utility/SharedCluster.h -lldb/include/lldb/Utility/State.h -lldb/include/lldb/Utility/StreamCallback.h -lldb/include/lldb/Utility/StreamString.h -lldb/include/lldb/Utility/StreamTee.h -lldb/include/lldb/Utility/StringExtractor.h -lldb/include/lldb/Utility/StringExtractorGDBRemote.h -lldb/include/lldb/Utility/StringLexer.h -lldb/include/lldb/Utility/StructuredData.h -lldb/include/lldb/Utility/TraceGDBRemotePackets.h -lldb/include/lldb/Utility/TraceIntelPTGDBRemotePackets.h -lldb/include/lldb/Utility/UnimplementedError.h -lldb/include/lldb/Utility/UriParser.h -lldb/include/lldb/Utility/UserID.h -lldb/include/lldb/Utility/UserIDResolver.h -lldb/include/lldb/Utility/VASPrintf.h -lldb/include/lldb/Utility/VMRange.h -lldb/include/lldb/Version/Version.h -lldb/source/API/SBAddress.cpp -lldb/source/API/SBAttachInfo.cpp -lldb/source/API/SBBroadcaster.cpp -lldb/source/API/SBCommandInterpreterRunOptions.cpp -lldb/source/API/SBCommunication.cpp -lldb/source/API/SBCompileUnit.cpp -lldb/source/API/SBDebugger.cpp -lldb/source/API/SBEnvironment.cpp -lldb/source/API/SBFile.cpp -lldb/source/API/SBFileSpec.cpp -lldb/source/API/SBFileSpecList.cpp -lldb/source/API/SBFunction.cpp -lldb/source/API/SBHostOS.cpp -lldb/source/API/SBLanguageRuntime.cpp -lldb/source/API/SBLaunchInfo.cpp -lldb/source/API/SBLineEntry.cpp -lldb/source/API/SBListener.cpp -lldb/source/API/SBModule.cpp -lldb/source/API/SBModuleSpec.cpp -lldb/source/API/SBProcessInfo.cpp -lldb/source/API/SBQueueItem.cpp -lldb/source/API/SBSection.cpp -lldb/source/API/SBStream.cpp -lldb/source/API/SBStringList.cpp -lldb/source/API/SBSymbol.cpp -lldb/source/API/SBSymbolContext.cpp -lldb/source/API/SBThreadPlan.cpp -lldb/source/API/SBTrace.cpp -lldb/source/API/SBTypeFilter.cpp -lldb/source/API/SBTypeFormat.cpp -lldb/source/API/SBUnixSignals.cpp -lldb/source/API/SBValueList.cpp -lldb/source/API/SBWatchpoint.cpp -lldb/source/API/SystemInitializerFull.cpp -lldb/source/API/SystemInitializerFull.h -lldb/source/API/Utils.h -lldb/source/Breakpoint/BreakpointList.cpp -lldb/source/Breakpoint/BreakpointPrecondition.cpp -lldb/source/Breakpoint/BreakpointResolverAddress.cpp -lldb/source/Breakpoint/BreakpointSiteList.cpp -lldb/source/Breakpoint/StoppointCallbackContext.cpp -lldb/source/Breakpoint/WatchpointList.cpp -lldb/source/Commands/CommandObjectApropos.cpp -lldb/source/Commands/CommandObjectApropos.h -lldb/source/Commands/CommandObjectBreakpoint.h -lldb/source/Commands/CommandObjectBreakpointCommand.cpp -lldb/source/Commands/CommandObjectBreakpointCommand.h -lldb/source/Commands/CommandObjectCommands.h -lldb/source/Commands/CommandObjectDisassemble.cpp -lldb/source/Commands/CommandObjectDisassemble.h -lldb/source/Commands/CommandObjectExpression.h -lldb/source/Commands/CommandObjectFrame.cpp -lldb/source/Commands/CommandObjectFrame.h -lldb/source/Commands/CommandObjectGUI.cpp -lldb/source/Commands/CommandObjectGUI.h -lldb/source/Commands/CommandObjectHelp.cpp -lldb/source/Commands/CommandObjectHelp.h -lldb/source/Commands/CommandObjectLanguage.h -lldb/source/Commands/CommandObjectLog.cpp -lldb/source/Commands/CommandObjectLog.h -lldb/source/Commands/CommandObjectMemory.cpp -lldb/source/Commands/CommandObjectMemory.h -lldb/source/Commands/CommandObjectMemoryTag.cpp -lldb/source/Commands/CommandObjectMemoryTag.h -lldb/source/Commands/CommandObjectPlatform.h -lldb/source/Commands/CommandObjectPlugin.cpp -lldb/source/Commands/CommandObjectPlugin.h -lldb/source/Commands/CommandObjectProcess.h -lldb/source/Commands/CommandObjectQuit.cpp -lldb/source/Commands/CommandObjectQuit.h -lldb/source/Commands/CommandObjectRegexCommand.cpp -lldb/source/Commands/CommandObjectRegexCommand.h -lldb/source/Commands/CommandObjectRegister.cpp -lldb/source/Commands/CommandObjectRegister.h -lldb/source/Commands/CommandObjectReproducer.cpp -lldb/source/Commands/CommandObjectReproducer.h -lldb/source/Commands/CommandObjectScript.cpp -lldb/source/Commands/CommandObjectScript.h -lldb/source/Commands/CommandObjectSession.cpp -lldb/source/Commands/CommandObjectSession.h -lldb/source/Commands/CommandObjectSettings.h -lldb/source/Commands/CommandObjectSource.h -lldb/source/Commands/CommandObjectStats.cpp -lldb/source/Commands/CommandObjectStats.h -lldb/source/Commands/CommandObjectTarget.h -lldb/source/Commands/CommandObjectThread.h -lldb/source/Commands/CommandObjectThreadUtil.cpp -lldb/source/Commands/CommandObjectThreadUtil.h -lldb/source/Commands/CommandObjectTrace.cpp -lldb/source/Commands/CommandObjectTrace.h -lldb/source/Commands/CommandObjectType.cpp -lldb/source/Commands/CommandObjectType.h -lldb/source/Commands/CommandObjectVersion.cpp -lldb/source/Commands/CommandObjectVersion.h -lldb/source/Commands/CommandObjectWatchpoint.cpp -lldb/source/Commands/CommandObjectWatchpoint.h -lldb/source/Commands/CommandObjectWatchpointCommand.h -lldb/source/Commands/CommandOptionsProcessLaunch.cpp -lldb/source/Commands/CommandOptionsProcessLaunch.h -lldb/source/Core/AddressRange.cpp -lldb/source/Core/AddressResolver.cpp -lldb/source/Core/AddressResolverFileLine.cpp -lldb/source/Core/Communication.cpp -lldb/source/Core/Declaration.cpp -lldb/source/Core/DumpDataExtractor.cpp -lldb/source/Core/DumpRegisterValue.cpp -lldb/source/Core/EmulateInstruction.cpp -lldb/source/Core/FileLineResolver.cpp -lldb/source/Core/FileSpecList.cpp -lldb/source/Core/FormatEntity.cpp -lldb/source/Core/Highlighter.cpp -lldb/source/Core/IOHandler.cpp -lldb/source/Core/IOHandlerCursesGUI.cpp -lldb/source/Core/ModuleChild.cpp -lldb/source/Core/Opcode.cpp -lldb/source/Core/Progress.cpp -lldb/source/Core/RichManglingContext.cpp -lldb/source/Core/SourceLocationSpec.cpp -lldb/source/Core/StreamAsynchronousIO.cpp -lldb/source/Core/StreamFile.cpp -lldb/source/Core/ValueObjectConstResultCast.cpp -lldb/source/Core/ValueObjectDynamicValue.cpp -lldb/source/Core/ValueObjectList.cpp -lldb/source/Core/ValueObjectMemory.cpp -lldb/source/Core/ValueObjectUpdater.cpp -lldb/source/DataFormatters/CXXFunctionPointer.cpp -lldb/source/DataFormatters/DumpValueObjectOptions.cpp -lldb/source/DataFormatters/ValueObjectPrinter.cpp -lldb/source/Expression/DiagnosticManager.cpp -lldb/source/Expression/Expression.cpp -lldb/source/Expression/ExpressionVariable.cpp -lldb/source/Expression/REPL.cpp -lldb/source/Expression/UtilityFunction.cpp -lldb/source/Host/android/HostInfoAndroid.cpp -lldb/source/Host/android/LibcGlue.cpp -lldb/source/Host/common/FileAction.cpp -lldb/source/Host/common/FileCache.cpp -lldb/source/Host/common/FileSystem.cpp -lldb/source/Host/common/HostNativeThreadBase.cpp -lldb/source/Host/common/HostProcess.cpp -lldb/source/Host/common/HostThread.cpp -lldb/source/Host/common/LockFileBase.cpp -lldb/source/Host/common/LZMA.cpp -lldb/source/Host/common/MonitoringProcessLauncher.cpp -lldb/source/Host/common/NativeRegisterContext.cpp -lldb/source/Host/common/NativeThreadProtocol.cpp -lldb/source/Host/common/NativeWatchpointList.cpp -lldb/source/Host/common/OptionParser.cpp -lldb/source/Host/common/PipeBase.cpp -lldb/source/Host/common/PseudoTerminal.cpp -lldb/source/Host/common/ThreadLauncher.cpp -lldb/source/Host/freebsd/Host.cpp -lldb/source/Host/freebsd/HostInfoFreeBSD.cpp -lldb/source/Host/linux/AbstractSocket.cpp -lldb/source/Host/linux/Host.cpp -lldb/source/Host/linux/HostInfoLinux.cpp -lldb/source/Host/linux/LibcGlue.cpp -lldb/source/Host/linux/Support.cpp -lldb/source/Host/macosx/cfcpp/CFCBundle.cpp -lldb/source/Host/macosx/cfcpp/CFCBundle.h -lldb/source/Host/macosx/cfcpp/CFCData.cpp -lldb/source/Host/macosx/cfcpp/CFCData.h -lldb/source/Host/macosx/cfcpp/CFCMutableArray.cpp -lldb/source/Host/macosx/cfcpp/CFCMutableArray.h -lldb/source/Host/macosx/cfcpp/CFCMutableDictionary.h -lldb/source/Host/macosx/cfcpp/CFCMutableSet.h -lldb/source/Host/macosx/cfcpp/CFCReleaser.h -lldb/source/Host/macosx/cfcpp/CFCString.cpp -lldb/source/Host/macosx/cfcpp/CFCString.h -lldb/source/Host/macosx/cfcpp/CoreFoundationCPP.h -lldb/source/Host/macosx/objcxx/PosixSpawnResponsible.h -lldb/source/Host/openbsd/HostInfoOpenBSD.cpp -lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp -lldb/source/Host/posix/FileSystemPosix.cpp -lldb/source/Host/posix/HostInfoPosix.cpp -lldb/source/Host/posix/HostThreadPosix.cpp -lldb/source/Host/posix/LockFilePosix.cpp -lldb/source/Host/posix/PipePosix.cpp -lldb/source/Host/windows/FileSystem.cpp -lldb/source/Host/windows/Host.cpp -lldb/source/Host/windows/HostInfoWindows.cpp -lldb/source/Host/windows/HostThreadWindows.cpp -lldb/source/Host/windows/LockFileWindows.cpp -lldb/source/Host/windows/ProcessLauncherWindows.cpp -lldb/source/Host/windows/ProcessRunLock.cpp -lldb/source/Initialization/SystemInitializer.cpp -lldb/source/Initialization/SystemInitializerCommon.cpp -lldb/source/Initialization/SystemLifetimeManager.cpp -lldb/source/Interpreter/CommandAlias.cpp -lldb/source/Interpreter/CommandHistory.cpp -lldb/source/Interpreter/CommandOptionValidators.cpp -lldb/source/Interpreter/CommandReturnObject.cpp -lldb/source/Interpreter/OptionGroupBoolean.cpp -lldb/source/Interpreter/OptionGroupFile.cpp -lldb/source/Interpreter/OptionGroupString.cpp -lldb/source/Interpreter/OptionGroupUInt64.cpp -lldb/source/Interpreter/OptionValue.cpp -lldb/source/Interpreter/OptionValueArch.cpp -lldb/source/Interpreter/OptionValueArgs.cpp -lldb/source/Interpreter/OptionValueBoolean.cpp -lldb/source/Interpreter/OptionValueChar.cpp -lldb/source/Interpreter/OptionValueFileColonLine.cpp -lldb/source/Interpreter/OptionValueFileSpecList.cpp -lldb/source/Interpreter/OptionValueFormat.cpp -lldb/source/Interpreter/OptionValueFormatEntity.cpp -lldb/source/Interpreter/OptionValuePathMappings.cpp -lldb/source/Interpreter/OptionValueRegex.cpp -lldb/source/Interpreter/OptionValueSInt64.cpp -lldb/source/Interpreter/OptionValueString.cpp -lldb/source/Interpreter/OptionValueUInt64.cpp -lldb/source/Interpreter/OptionValueUUID.cpp -lldb/source/Interpreter/ScriptInterpreter.cpp -lldb/source/Plugins/ABI/AArch64/ABIAArch64.cpp -lldb/source/Plugins/ABI/AArch64/ABIAArch64.h -lldb/source/Plugins/ABI/AArch64/ABIMacOSX_arm64.cpp -lldb/source/Plugins/ABI/ARM/ABIARM.cpp -lldb/source/Plugins/ABI/ARM/ABIARM.h -lldb/source/Plugins/ABI/Mips/ABIMips.cpp -lldb/source/Plugins/ABI/Mips/ABIMips.h -lldb/source/Plugins/ABI/PowerPC/ABIPowerPC.cpp -lldb/source/Plugins/ABI/PowerPC/ABIPowerPC.h -lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp -lldb/source/Plugins/ABI/X86/ABIX86.h -lldb/source/Plugins/ABI/X86/ABIX86_64.h -lldb/source/Plugins/ABI/X86/ABIX86_i386.cpp -lldb/source/Plugins/ABI/X86/ABIX86_i386.h -lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.cpp -lldb/source/Plugins/Architecture/AArch64/ArchitectureAArch64.h -lldb/source/Plugins/Architecture/Arm/ArchitectureArm.h -lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.cpp -lldb/source/Plugins/Architecture/PPC64/ArchitecturePPC64.h -lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.h -lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.cpp -lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/DynamicLoaderHexagonDYLD.h -lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.h -lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.h -lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOS.h -lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.h -lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.h -lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.h -lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.h -lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.cpp -lldb/source/Plugins/DynamicLoader/wasm-DYLD/DynamicLoaderWasmDYLD.h -lldb/source/Plugins/DynamicLoader/Windows-DYLD/DynamicLoaderWindowsDYLD.h -lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp -lldb/source/Plugins/ExpressionParser/Clang/ASTStructExtractor.cpp -lldb/source/Plugins/ExpressionParser/Clang/ASTUtils.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.h -lldb/source/Plugins/ExpressionParser/Clang/ClangASTMetadata.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangASTMetadata.h -lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h -lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangDeclVendor.h -lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionHelper.h -lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionVariable.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangExternalASTSourceCallbacks.h -lldb/source/Plugins/ExpressionParser/Clang/ClangFunctionCaller.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangHost.h -lldb/source/Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h -lldb/source/Plugins/ExpressionParser/Clang/ClangPersistentVariables.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangUtil.cpp -lldb/source/Plugins/ExpressionParser/Clang/ClangUtilityFunction.h -lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.cpp -lldb/source/Plugins/ExpressionParser/Clang/CppModuleConfiguration.h -lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.h -lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp -lldb/source/Plugins/ExpressionParser/Clang/ModuleDependencyCollector.h -lldb/source/Plugins/ExpressionParser/Clang/NameSearchContext.h -lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.h -lldb/source/Plugins/Instruction/ARM/EmulationStateARM.h -lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.h -lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp -lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.h -lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.cpp -lldb/source/Plugins/InstrumentationRuntime/ASan/InstrumentationRuntimeASan.h -lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.cpp -lldb/source/Plugins/InstrumentationRuntime/MainThreadChecker/InstrumentationRuntimeMainThreadChecker.h -lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.cpp -lldb/source/Plugins/InstrumentationRuntime/TSan/InstrumentationRuntimeTSan.h -lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.cpp -lldb/source/Plugins/InstrumentationRuntime/UBSan/InstrumentationRuntimeUBSan.h -lldb/source/Plugins/JITLoader/GDB/JITLoaderGDB.h -lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp -lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.h -lldb/source/Plugins/Language/CPlusPlus/BlockPointer.cpp -lldb/source/Plugins/Language/CPlusPlus/BlockPointer.h -lldb/source/Plugins/Language/CPlusPlus/Coroutines.cpp -lldb/source/Plugins/Language/CPlusPlus/Coroutines.h -lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h -lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.h -lldb/source/Plugins/Language/CPlusPlus/CxxStringTypes.h -lldb/source/Plugins/Language/CPlusPlus/Generic.h -lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp -lldb/source/Plugins/Language/CPlusPlus/GenericOptional.cpp -lldb/source/Plugins/Language/CPlusPlus/LibCxx.h -lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.cpp -lldb/source/Plugins/Language/CPlusPlus/LibCxxAtomic.h -lldb/source/Plugins/Language/CPlusPlus/LibCxxInitializerList.cpp -lldb/source/Plugins/Language/CPlusPlus/LibCxxList.cpp -lldb/source/Plugins/Language/CPlusPlus/LibCxxMap.cpp -lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.cpp -lldb/source/Plugins/Language/CPlusPlus/LibCxxVariant.h -lldb/source/Plugins/Language/CPlusPlus/LibStdcpp.h -lldb/source/Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.cpp -lldb/source/Plugins/Language/CPlusPlus/MSVCUndecoratedNameParser.h -lldb/source/Plugins/Language/ObjC/CF.cpp -lldb/source/Plugins/Language/ObjC/CF.h -lldb/source/Plugins/Language/ObjC/CFBasicHash.h -lldb/source/Plugins/Language/ObjC/Cocoa.h -lldb/source/Plugins/Language/ObjC/CoreMedia.cpp -lldb/source/Plugins/Language/ObjC/CoreMedia.h -lldb/source/Plugins/Language/ObjC/NSDictionary.h -lldb/source/Plugins/Language/ObjC/NSError.cpp -lldb/source/Plugins/Language/ObjC/NSIndexPath.cpp -lldb/source/Plugins/Language/ObjC/NSSet.h -lldb/source/Plugins/Language/ObjC/NSString.cpp -lldb/source/Plugins/Language/ObjC/NSString.h -lldb/source/Plugins/Language/ObjC/ObjCConstants.h -lldb/source/Plugins/Language/ObjC/ObjCLanguage.h -lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.cpp -lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h -lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h -lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h -lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h -lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp -lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h -lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.cpp -lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptExpressionOpts.h -lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptScriptGroup.h -lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.cpp -lldb/source/Plugins/MemoryHistory/asan/MemoryHistoryASan.h -lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp -lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.h -lldb/source/Plugins/ObjectFile/Breakpad/BreakpadRecords.h -lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.cpp -lldb/source/Plugins/ObjectFile/Breakpad/ObjectFileBreakpad.h -lldb/source/Plugins/ObjectFile/ELF/ELFHeader.cpp -lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.cpp -lldb/source/Plugins/ObjectFile/Minidump/MinidumpFileBuilder.h -lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.cpp -lldb/source/Plugins/ObjectFile/Minidump/ObjectFileMinidump.h -lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.cpp -lldb/source/Plugins/ObjectFile/PDB/ObjectFilePDB.h -lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h -lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.h -lldb/source/Plugins/ObjectFile/PECOFF/WindowsMiniDump.h -lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp -lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.h -lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.cpp -lldb/source/Plugins/OperatingSystem/Python/OperatingSystemPython.h -lldb/source/Plugins/Platform/Android/AdbClient.h -lldb/source/Plugins/Platform/Android/PlatformAndroidRemoteGDBServer.cpp -lldb/source/Plugins/Platform/Android/PlatformAndroidRemoteGDBServer.h -lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h -lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h -lldb/source/Plugins/Platform/Linux/PlatformLinux.h -lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h -lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h -lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h -lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp -lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h -lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h -lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp -lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h -lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h -lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp -lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h -lldb/source/Plugins/Platform/MacOSX/objcxx/PlatformiOSSimulatorCoreSimulatorSupport.h -lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h -lldb/source/Plugins/Platform/OpenBSD/PlatformOpenBSD.h -lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.cpp -lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h -lldb/source/Plugins/Platform/Windows/PlatformWindows.h -lldb/source/Plugins/Process/elf-core/ProcessElfCore.h -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.cpp -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm.h -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.cpp -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_arm64.h -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_mips64.h -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_powerpc.cpp -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_powerpc.h -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_ppc64le.cpp -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_ppc64le.h -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_s390x.cpp -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_s390x.h -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_x86_64.cpp -lldb/source/Plugins/Process/elf-core/RegisterContextPOSIXCore_x86_64.h -lldb/source/Plugins/Process/elf-core/RegisterUtilities.cpp -lldb/source/Plugins/Process/elf-core/RegisterUtilities.h -lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp -lldb/source/Plugins/Process/elf-core/ThreadElfCore.h -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD.cpp -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD.h -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.cpp -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm.h -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.cpp -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_mips64.h -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.cpp -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_powerpc.h -lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_x86_64.cpp -lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.cpp -lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.h -lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp -lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_arm64.cpp -lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_arm64.h -lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_i386.h -lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_x86_64.cpp -lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_x86_64.h -lldb/source/Plugins/Process/FreeBSDKernel/ThreadFreeBSDKernel.cpp -lldb/source/Plugins/Process/FreeBSDKernel/ThreadFreeBSDKernel.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationHistory.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterFallback.cpp -lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterFallback.h -lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h -lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp -lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h -lldb/source/Plugins/Process/Linux/IntelPTManager.cpp -lldb/source/Plugins/Process/Linux/IntelPTManager.h -lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux.cpp -lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.h -lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.h -lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_ppc64le.h -lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_s390x.h -lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_x86_64.h -lldb/source/Plugins/Process/Linux/NativeThreadLinux.h -lldb/source/Plugins/Process/Linux/Procfs.h -lldb/source/Plugins/Process/Linux/SingleStepCheck.h -lldb/source/Plugins/Process/mach-core/ProcessMachCore.h -lldb/source/Plugins/Process/mach-core/ThreadMachCore.cpp -lldb/source/Plugins/Process/mach-core/ThreadMachCore.h -lldb/source/Plugins/Process/MacOSX-Kernel/CommunicationKDP.h -lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h -lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.cpp -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.cpp -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.cpp -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.cpp -lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h -lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.cpp -lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h -lldb/source/Plugins/Process/minidump/MinidumpParser.h -lldb/source/Plugins/Process/minidump/MinidumpTypes.cpp -lldb/source/Plugins/Process/minidump/NtStructures.h -lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.h -lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.h -lldb/source/Plugins/Process/minidump/RegisterContextMinidump_x86_32.cpp -lldb/source/Plugins/Process/minidump/RegisterContextMinidump_x86_32.h -lldb/source/Plugins/Process/minidump/RegisterContextMinidump_x86_64.cpp -lldb/source/Plugins/Process/minidump/RegisterContextMinidump_x86_64.h -lldb/source/Plugins/Process/minidump/ThreadMinidump.cpp -lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp -lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD.cpp -lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD.h -lldb/source/Plugins/Process/NetBSD/NativeRegisterContextNetBSD_x86_64.cpp -lldb/source/Plugins/Process/POSIX/CrashReason.h -lldb/source/Plugins/Process/POSIX/NativeProcessELF.cpp -lldb/source/Plugins/Process/POSIX/NativeProcessELF.h -lldb/source/Plugins/Process/POSIX/ProcessMessage.cpp -lldb/source/Plugins/Process/POSIX/ProcessMessage.h -lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp -lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h -lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp -lldb/source/Plugins/Process/scripted/ScriptedProcess.h -lldb/source/Plugins/Process/scripted/ScriptedThread.cpp -lldb/source/Plugins/Process/scripted/ScriptedThread.h -lldb/source/Plugins/Process/Utility/ARMDefines.h -lldb/source/Plugins/Process/Utility/ARMUtils.h -lldb/source/Plugins/Process/Utility/AuxVector.cpp -lldb/source/Plugins/Process/Utility/AuxVector.h -lldb/source/Plugins/Process/Utility/FreeBSDSignals.cpp -lldb/source/Plugins/Process/Utility/FreeBSDSignals.h -lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp -lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h -lldb/source/Plugins/Process/Utility/HistoryThread.cpp -lldb/source/Plugins/Process/Utility/HistoryThread.h -lldb/source/Plugins/Process/Utility/HistoryUnwind.cpp -lldb/source/Plugins/Process/Utility/HistoryUnwind.h -lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.cpp -lldb/source/Plugins/Process/Utility/InferiorCallPOSIX.h -lldb/source/Plugins/Process/Utility/InstructionUtils.h -lldb/source/Plugins/Process/Utility/LinuxProcMaps.cpp -lldb/source/Plugins/Process/Utility/LinuxProcMaps.h -lldb/source/Plugins/Process/Utility/LinuxSignals.cpp -lldb/source/Plugins/Process/Utility/LinuxSignals.h -lldb/source/Plugins/Process/Utility/lldb-mips-freebsd-register-enums.h -lldb/source/Plugins/Process/Utility/lldb-ppc64-register-enums.h -lldb/source/Plugins/Process/Utility/lldb-ppc64le-register-enums.h -lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp -lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h -lldb/source/Plugins/Process/Utility/MipsLinuxSignals.cpp -lldb/source/Plugins/Process/Utility/MipsLinuxSignals.h -lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.cpp -lldb/source/Plugins/Process/Utility/NativeProcessSoftwareSingleStep.h -lldb/source/Plugins/Process/Utility/NativeRegisterContextDBReg_arm64.cpp -lldb/source/Plugins/Process/Utility/NativeRegisterContextDBReg_arm64.h -lldb/source/Plugins/Process/Utility/NativeRegisterContextDBReg_x86.cpp -lldb/source/Plugins/Process/Utility/NativeRegisterContextDBReg_x86.h -lldb/source/Plugins/Process/Utility/NativeRegisterContextRegisterInfo.cpp -lldb/source/Plugins/Process/Utility/NetBSDSignals.cpp -lldb/source/Plugins/Process/Utility/NetBSDSignals.h -lldb/source/Plugins/Process/Utility/RegisterContextDarwinConstants.h -lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.h -lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.h -lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.h -lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.h -lldb/source/Plugins/Process/Utility/RegisterContextDummy.cpp -lldb/source/Plugins/Process/Utility/RegisterContextDummy.h -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.cpp -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_i386.h -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.cpp -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_mips64.h -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.cpp -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_powerpc.h -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.cpp -lldb/source/Plugins/Process/Utility/RegisterContextFreeBSD_x86_64.h -lldb/source/Plugins/Process/Utility/RegisterContextHistory.cpp -lldb/source/Plugins/Process/Utility/RegisterContextHistory.h -lldb/source/Plugins/Process/Utility/RegisterContextLinux_i386.h -lldb/source/Plugins/Process/Utility/RegisterContextLinux_s390x.cpp -lldb/source/Plugins/Process/Utility/RegisterContextLinux_s390x.h -lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.cpp -lldb/source/Plugins/Process/Utility/RegisterContextLinux_x86_64.h -lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h -lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.cpp -lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h -lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.cpp -lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h -lldb/source/Plugins/Process/Utility/RegisterContextMemory.cpp -lldb/source/Plugins/Process/Utility/RegisterContextMemory.h -lldb/source/Plugins/Process/Utility/RegisterContextNetBSD_i386.h -lldb/source/Plugins/Process/Utility/RegisterContextNetBSD_x86_64.h -lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_i386.cpp -lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_i386.h -lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_x86_64.cpp -lldb/source/Plugins/Process/Utility/RegisterContextOpenBSD_x86_64.h -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.cpp -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm.h -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.cpp -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_arm64.h -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_powerpc.cpp -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_powerpc.h -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_ppc64le.cpp -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_s390x.cpp -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_s390x.h -lldb/source/Plugins/Process/Utility/RegisterContextPOSIX_x86.h -lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp -lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.h -lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp -lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.h -lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp -lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.h -lldb/source/Plugins/Process/Utility/RegisterContext_mips.h -lldb/source/Plugins/Process/Utility/RegisterContext_powerpc.h -lldb/source/Plugins/Process/Utility/RegisterContext_s390x.h -lldb/source/Plugins/Process/Utility/RegisterContext_x86.cpp -lldb/source/Plugins/Process/Utility/RegisterInfoAndSetInterface.h -lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h -lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_ppc64le.h -lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp -lldb/source/Plugins/Process/Utility/StopInfoMachException.h -lldb/source/Plugins/Process/Utility/ThreadMemory.cpp -lldb/source/Plugins/Process/Utility/ThreadMemory.h -lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp -lldb/source/Plugins/Process/Windows/Common/LocalDebugDelegate.cpp -lldb/source/Plugins/Process/Windows/Common/NativeProcessWindows.h -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows.cpp -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.h -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.h -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.h -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.h -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp -lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.h -lldb/source/Plugins/Process/Windows/Common/NativeThreadWindows.cpp -lldb/source/Plugins/Process/Windows/Common/NativeThreadWindows.h -lldb/source/Plugins/Process/Windows/Common/NtStructures.h -lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp -lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.h -lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.h -lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.cpp -lldb/source/Plugins/Process/Windows/Common/TargetThreadWindows.h -lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.h -lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.h -lldb/source/Plugins/REPL/Clang/ClangREPL.h -lldb/source/Plugins/ScriptInterpreter/Lua/Lua.cpp -lldb/source/Plugins/ScriptInterpreter/Lua/Lua.h -lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.cpp -lldb/source/Plugins/ScriptInterpreter/Lua/ScriptInterpreterLua.h -lldb/source/Plugins/ScriptInterpreter/Lua/SWIGLuaBridge.h -lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.cpp -lldb/source/Plugins/ScriptInterpreter/None/ScriptInterpreterNone.h -lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h -lldb/source/Plugins/ScriptInterpreter/Python/PythonReadline.cpp -lldb/source/Plugins/ScriptInterpreter/Python/PythonReadline.h -lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp -lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h -lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.cpp -lldb/source/Plugins/ScriptInterpreter/Python/ScriptedPythonInterface.h -lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp -lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h -lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.h -lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.cpp -lldb/source/Plugins/ScriptInterpreter/Python/SWIGPythonBridge.h -lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.cpp -lldb/source/Plugins/SymbolFile/DWARF/AppleDWARFIndex.h -lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h -lldb/source/Plugins/SymbolFile/DWARF/DIERef.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFAbbreviationDeclaration.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFAbbreviationDeclaration.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFBaseDIE.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFCompileUnit.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFContext.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFDataExtractor.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAbbrev.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugArangeSet.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugMacro.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugRanges.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFDeclContext.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFDefines.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFDIE.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h -lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.cpp -lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h -lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp -lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h -lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h -lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h -lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp -lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp -lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h -lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDwo.cpp -lldb/source/Plugins/SymbolFile/DWARF/UniqueDWARFASTType.cpp -lldb/source/Plugins/SymbolFile/NativePDB/CodeViewRegisterMapping.cpp -lldb/source/Plugins/SymbolFile/NativePDB/CodeViewRegisterMapping.h -lldb/source/Plugins/SymbolFile/NativePDB/CompileUnitIndex.h -lldb/source/Plugins/SymbolFile/NativePDB/DWARFLocationExpression.h -lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.h -lldb/source/Plugins/SymbolFile/NativePDB/PdbIndex.cpp -lldb/source/Plugins/SymbolFile/NativePDB/PdbIndex.h -lldb/source/Plugins/SymbolFile/NativePDB/PdbSymUid.cpp -lldb/source/Plugins/SymbolFile/NativePDB/PdbSymUid.h -lldb/source/Plugins/SymbolFile/NativePDB/PdbUtil.cpp -lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp -lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.h -lldb/source/Plugins/SymbolFile/PDB/PDBLocationToDWARFExpression.cpp -lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.h -lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp -lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.h -lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp -lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.h -lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.cpp -lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h -lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp -lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.h -lldb/source/Plugins/SystemRuntime/MacOSX/SystemRuntimeMacOSX.h -lldb/source/Plugins/Trace/common/ThreadPostMortemTrace.cpp -lldb/source/Plugins/Trace/common/ThreadPostMortemTrace.h -lldb/source/Plugins/Trace/common/TraceJSONStructs.cpp -lldb/source/Plugins/Trace/common/TraceJSONStructs.h -lldb/source/Plugins/Trace/common/TraceSessionFileParser.cpp -lldb/source/Plugins/Trace/common/TraceSessionSaver.cpp -lldb/source/Plugins/Trace/common/TraceSessionSaver.h -lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.cpp -lldb/source/Plugins/Trace/intel-pt/CommandObjectTraceStartIntelPT.h -lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp -lldb/source/Plugins/Trace/intel-pt/DecodedThread.h -lldb/source/Plugins/Trace/intel-pt/forward-declarations.h -lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.cpp -lldb/source/Plugins/Trace/intel-pt/IntelPTDecoder.h -lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.cpp -lldb/source/Plugins/Trace/intel-pt/TraceCursorIntelPT.h -lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.cpp -lldb/source/Plugins/Trace/intel-pt/TraceIntelPT.h -lldb/source/Plugins/Trace/intel-pt/TraceIntelPTConstants.h -lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.cpp -lldb/source/Plugins/Trace/intel-pt/TraceIntelPTJSONStructs.h -lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionFileParser.cpp -lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.cpp -lldb/source/Plugins/Trace/intel-pt/TraceIntelPTSessionSaver.h -lldb/source/Plugins/TraceExporter/common/TraceHTR.cpp -lldb/source/Plugins/TraceExporter/common/TraceHTR.h -lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp -lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.h -lldb/source/Plugins/TraceExporter/ctf/TraceExporterCTF.cpp -lldb/source/Plugins/UnwindAssembly/InstEmulation/UnwindAssemblyInstEmulation.h -lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.cpp -lldb/source/Plugins/UnwindAssembly/x86/UnwindAssembly-x86.h -lldb/source/Symbol/ArmUnwindInfo.cpp -lldb/source/Symbol/Block.cpp -lldb/source/Symbol/CompilerDecl.cpp -lldb/source/Symbol/CompilerDeclContext.cpp -lldb/source/Symbol/DebugMacros.cpp -lldb/source/Symbol/DeclVendor.cpp -lldb/source/Symbol/LineEntry.cpp -lldb/source/Symbol/LocateSymbolFile.cpp -lldb/source/Symbol/PostfixExpression.cpp -lldb/source/Symbol/SymbolContext.cpp -lldb/source/Symbol/SymbolFile.cpp -lldb/source/Symbol/SymbolVendor.cpp -lldb/source/Symbol/TypeList.cpp -lldb/source/Symbol/TypeMap.cpp -lldb/source/Symbol/TypeSystem.cpp -lldb/source/Symbol/UnwindTable.cpp -lldb/source/Symbol/Variable.cpp -lldb/source/Symbol/VariableList.cpp -lldb/source/Target/AssertFrameRecognizer.cpp -lldb/source/Target/InstrumentationRuntime.cpp -lldb/source/Target/InstrumentationRuntimeStopInfo.cpp -lldb/source/Target/JITLoader.cpp -lldb/source/Target/Language.cpp -lldb/source/Target/MemoryHistory.cpp -lldb/source/Target/MemoryRegionInfo.cpp -lldb/source/Target/MemoryTagMap.cpp -lldb/source/Target/ModuleCache.cpp -lldb/source/Target/OperatingSystem.cpp -lldb/source/Target/ProcessTrace.cpp -lldb/source/Target/Queue.cpp -lldb/source/Target/RegisterContext.cpp -lldb/source/Target/RegisterNumber.cpp -lldb/source/Target/SectionLoadHistory.cpp -lldb/source/Target/SectionLoadList.cpp -lldb/source/Target/StackID.cpp -lldb/source/Target/SystemRuntime.cpp -lldb/source/Target/ThreadCollection.cpp -lldb/source/Target/ThreadPlanCallFunctionUsingABI.cpp -lldb/source/Target/ThreadPlanCallOnFunctionExit.cpp -lldb/source/Target/ThreadPlanCallUserExpression.cpp -lldb/source/Target/ThreadPlanRunToAddress.cpp -lldb/source/Target/ThreadPlanShouldStopHere.cpp -lldb/source/Target/ThreadPlanStepInRange.cpp -lldb/source/Target/ThreadPlanStepThrough.cpp -lldb/source/Target/ThreadPlanStepUntil.cpp -lldb/source/Target/ThreadSpec.cpp -lldb/source/Target/Trace.cpp -lldb/source/Target/TraceCursor.cpp -lldb/source/Target/TraceExporter.cpp -lldb/source/Target/TraceInstructionDumper.cpp -lldb/source/Target/UnwindAssembly.cpp -lldb/source/Target/UnwindLLDB.cpp -lldb/source/Utility/Args.cpp -lldb/source/Utility/ARM64_DWARF_Registers.h -lldb/source/Utility/ARM64_ehframe_Registers.h -lldb/source/Utility/ARM_DWARF_Registers.h -lldb/source/Utility/ARM_ehframe_Registers.h -lldb/source/Utility/Baton.cpp -lldb/source/Utility/Broadcaster.cpp -lldb/source/Utility/Connection.cpp -lldb/source/Utility/DataBufferLLVM.cpp -lldb/source/Utility/DataEncoder.cpp -lldb/source/Utility/DataExtractor.cpp -lldb/source/Utility/Environment.cpp -lldb/source/Utility/GDBRemote.cpp -lldb/source/Utility/Instrumentation.cpp -lldb/source/Utility/IOObject.cpp -lldb/source/Utility/Listener.cpp -lldb/source/Utility/LLDBAssert.cpp -lldb/source/Utility/LLDBLog.cpp -lldb/source/Utility/NameMatches.cpp -lldb/source/Utility/PPC64LE_DWARF_Registers.h -lldb/source/Utility/PPC64_DWARF_Registers.h -lldb/source/Utility/RegularExpression.cpp -lldb/source/Utility/Reproducer.cpp -lldb/source/Utility/ReproducerProvider.cpp -lldb/source/Utility/State.cpp -lldb/source/Utility/Status.cpp -lldb/source/Utility/Stream.cpp -lldb/source/Utility/StreamCallback.cpp -lldb/source/Utility/StreamString.cpp -lldb/source/Utility/StringExtractor.cpp -lldb/source/Utility/StringExtractorGDBRemote.cpp -lldb/source/Utility/StringLexer.cpp -lldb/source/Utility/StringList.cpp -lldb/source/Utility/StructuredData.cpp -lldb/source/Utility/TildeExpressionResolver.cpp -lldb/source/Utility/Timer.cpp -lldb/source/Utility/TraceGDBRemotePackets.cpp -lldb/source/Utility/TraceIntelPTGDBRemotePackets.cpp -lldb/source/Utility/UnimplementedError.cpp -lldb/source/Utility/UriParser.cpp -lldb/source/Utility/UserID.cpp -lldb/source/Utility/UserIDResolver.cpp -lldb/source/Utility/UuidCompatibility.h -lldb/source/Utility/VASprintf.cpp -lldb/source/Utility/VMRange.cpp -lldb/source/Utility/XcodeSDK.cpp -lldb/source/Version/Version.cpp -lldb/tools/argdumper/argdumper.cpp -lldb/tools/darwin-debug/darwin-debug.cpp -lldb/tools/debugserver/source/ARM_DWARF_Registers.h -lldb/tools/debugserver/source/ARM_ehframe_Registers.h -lldb/tools/debugserver/source/DNB.h -lldb/tools/debugserver/source/DNBArch.cpp -lldb/tools/debugserver/source/DNBArch.h -lldb/tools/debugserver/source/DNBBreakpoint.cpp -lldb/tools/debugserver/source/DNBBreakpoint.h -lldb/tools/debugserver/source/DNBDataRef.cpp -lldb/tools/debugserver/source/DNBDataRef.h -lldb/tools/debugserver/source/DNBDefs.h -lldb/tools/debugserver/source/DNBError.cpp -lldb/tools/debugserver/source/DNBError.h -lldb/tools/debugserver/source/DNBLog.cpp -lldb/tools/debugserver/source/DNBLog.h -lldb/tools/debugserver/source/DNBRegisterInfo.cpp -lldb/tools/debugserver/source/DNBRegisterInfo.h -lldb/tools/debugserver/source/DNBRuntimeAction.h -lldb/tools/debugserver/source/DNBThreadResumeActions.cpp -lldb/tools/debugserver/source/DNBThreadResumeActions.h -lldb/tools/debugserver/source/DNBTimer.h -lldb/tools/debugserver/source/JSON.cpp -lldb/tools/debugserver/source/JSON.h -lldb/tools/debugserver/source/JSONGenerator.h -lldb/tools/debugserver/source/libdebugserver.h -lldb/tools/debugserver/source/PseudoTerminal.cpp -lldb/tools/debugserver/source/PseudoTerminal.h -lldb/tools/debugserver/source/PThreadCondition.h -lldb/tools/debugserver/source/PThreadEvent.cpp -lldb/tools/debugserver/source/PThreadEvent.h -lldb/tools/debugserver/source/PThreadMutex.h -lldb/tools/debugserver/source/RNBDefs.h -lldb/tools/debugserver/source/RNBServices.h -lldb/tools/debugserver/source/RNBSocket.h -lldb/tools/debugserver/source/StdStringExtractor.cpp -lldb/tools/debugserver/source/StringConvert.cpp -lldb/tools/debugserver/source/StringConvert.h -lldb/tools/debugserver/source/SysSignal.cpp -lldb/tools/debugserver/source/SysSignal.h -lldb/tools/debugserver/source/TTYState.cpp -lldb/tools/debugserver/source/TTYState.h -lldb/tools/debugserver/source/MacOSX/CFBundle.cpp -lldb/tools/debugserver/source/MacOSX/CFBundle.h -lldb/tools/debugserver/source/MacOSX/CFString.cpp -lldb/tools/debugserver/source/MacOSX/CFString.h -lldb/tools/debugserver/source/MacOSX/CFUtils.h -lldb/tools/debugserver/source/MacOSX/Genealogy.cpp -lldb/tools/debugserver/source/MacOSX/Genealogy.h -lldb/tools/debugserver/source/MacOSX/GenealogySPI.h -lldb/tools/debugserver/source/MacOSX/MachException.h -lldb/tools/debugserver/source/MacOSX/MachThread.cpp -lldb/tools/debugserver/source/MacOSX/MachThread.h -lldb/tools/debugserver/source/MacOSX/MachThreadList.h -lldb/tools/debugserver/source/MacOSX/MachVMMemory.h -lldb/tools/debugserver/source/MacOSX/MachVMRegion.cpp -lldb/tools/debugserver/source/MacOSX/MachVMRegion.h -lldb/tools/debugserver/source/MacOSX/OsLogger.h -lldb/tools/debugserver/source/MacOSX/arm/DNBArchImpl.h -lldb/tools/debugserver/source/MacOSX/arm64/DNBArchImplARM64.h -lldb/tools/driver/Driver.cpp -lldb/tools/driver/Driver.h -lldb/tools/driver/Platform.cpp -lldb/tools/driver/Platform.h -lldb/tools/intel-features/cli-wrapper.cpp -lldb/tools/intel-features/intel-mpx/cli-wrapper-mpxtable.h -lldb/tools/lldb-instr/Instrument.cpp -lldb/tools/lldb-server/Acceptor.h -lldb/tools/lldb-server/LLDBServerUtilities.cpp -lldb/tools/lldb-server/SystemInitializerLLGS.cpp -lldb/tools/lldb-server/SystemInitializerLLGS.h -lldb/tools/lldb-test/SystemInitializerTest.cpp -lldb/tools/lldb-test/SystemInitializerTest.h -lldb/tools/lldb-vscode/BreakpointBase.cpp -lldb/tools/lldb-vscode/BreakpointBase.h -lldb/tools/lldb-vscode/ExceptionBreakpoint.cpp -lldb/tools/lldb-vscode/ExceptionBreakpoint.h -lldb/tools/lldb-vscode/FifoFiles.cpp -lldb/tools/lldb-vscode/FifoFiles.h -lldb/tools/lldb-vscode/FunctionBreakpoint.cpp -lldb/tools/lldb-vscode/FunctionBreakpoint.h -lldb/tools/lldb-vscode/IOStream.cpp -lldb/tools/lldb-vscode/IOStream.h -lldb/tools/lldb-vscode/JSONUtils.cpp -lldb/tools/lldb-vscode/JSONUtils.h -lldb/tools/lldb-vscode/lldb-vscode.cpp -lldb/tools/lldb-vscode/LLDBUtils.cpp -lldb/tools/lldb-vscode/LLDBUtils.h -lldb/tools/lldb-vscode/OutputRedirector.cpp -lldb/tools/lldb-vscode/OutputRedirector.h -lldb/tools/lldb-vscode/RunInTerminal.cpp -lldb/tools/lldb-vscode/RunInTerminal.h -lldb/tools/lldb-vscode/SourceBreakpoint.cpp -lldb/tools/lldb-vscode/SourceBreakpoint.h -lldb/tools/lldb-vscode/SourceReference.h -lldb/tools/lldb-vscode/VSCode.h -lldb/tools/lldb-vscode/VSCodeForward.h -lldb/unittests/gtest_common.h -lldb/unittests/API/SBCommandInterpreterTest.cpp -lldb/unittests/API/SBStructuredDataTest.cpp -lldb/unittests/Breakpoint/BreakpointIDTest.cpp -lldb/unittests/Core/CommunicationTest.cpp -lldb/unittests/Core/DumpDataExtractorTest.cpp -lldb/unittests/Core/FormatEntityTest.cpp -lldb/unittests/Core/MangledTest.cpp -lldb/unittests/Core/ModuleSpecTest.cpp -lldb/unittests/Core/RichManglingContextTest.cpp -lldb/unittests/Core/SourceLocationSpecTest.cpp -lldb/unittests/Core/SourceManagerTest.cpp -lldb/unittests/Core/StreamCallbackTest.cpp -lldb/unittests/DataFormatter/FormatManagerTests.cpp -lldb/unittests/DataFormatter/FormattersContainerTest.cpp -lldb/unittests/DataFormatter/StringPrinterTests.cpp -lldb/unittests/debugserver/JSONTest.cpp -lldb/unittests/debugserver/RNBSocketTest.cpp -lldb/unittests/Editline/EditlineTest.cpp -lldb/unittests/Expression/ClangExpressionDeclMapTest.cpp -lldb/unittests/Expression/CppModuleConfigurationTest.cpp -lldb/unittests/Expression/DiagnosticManagerTest.cpp -lldb/unittests/Host/ConnectionFileDescriptorTest.cpp -lldb/unittests/Host/FileActionTest.cpp -lldb/unittests/Host/FileSystemTest.cpp -lldb/unittests/Host/FileTest.cpp -lldb/unittests/Host/HostTest.cpp -lldb/unittests/Host/NativeProcessProtocolTest.cpp -lldb/unittests/Host/PipeTest.cpp -lldb/unittests/Host/ProcessLaunchInfoTest.cpp -lldb/unittests/Host/SocketAddressTest.cpp -lldb/unittests/Host/SocketTestUtilities.h -lldb/unittests/Host/ThreadLauncherTest.cpp -lldb/unittests/Host/linux/HostTest.cpp -lldb/unittests/Host/linux/SupportTest.cpp -lldb/unittests/Interpreter/TestOptionValueFileColonLine.cpp -lldb/unittests/Interpreter/TestRegexCommand.cpp -lldb/unittests/Language/CLanguages/CLanguagesTest.cpp -lldb/unittests/Language/Highlighting/HighlighterTest.cpp -lldb/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp -lldb/unittests/Platform/PlatformDarwinTest.cpp -lldb/unittests/Platform/PlatformSiginfoTest.cpp -lldb/unittests/Process/ProcessEventDataTest.cpp -lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerLLGSTest.cpp -lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp -lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationTest.cpp -lldb/unittests/Process/gdb-remote/GDBRemoteTestUtils.cpp -lldb/unittests/Process/gdb-remote/GDBRemoteTestUtils.h -lldb/unittests/Process/gdb-remote/PortMapTest.cpp -lldb/unittests/Process/minidump/Inputs/linux-x86_64.cpp -lldb/unittests/Process/POSIX/NativeProcessELFTest.cpp -lldb/unittests/Process/Utility/LinuxProcMapsTest.cpp -lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp -lldb/unittests/Process/Utility/RegisterContextFreeBSDTest.cpp -lldb/unittests/Process/Utility/RegisterContextTest.cpp -lldb/unittests/ScriptInterpreter/Lua/LuaTests.cpp -lldb/unittests/ScriptInterpreter/Lua/ScriptInterpreterTests.cpp -lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp -lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.h -lldb/unittests/Signals/UnixSignalsTest.cpp -lldb/unittests/Symbol/LocateSymbolFileTest.cpp -lldb/unittests/Symbol/MangledTest.cpp -lldb/unittests/Symbol/SymbolTest.cpp -lldb/unittests/Symbol/SymtabTest.cpp -lldb/unittests/Symbol/TestClangASTImporter.cpp -lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp -lldb/unittests/Symbol/TestTypeSystem.cpp -lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp -lldb/unittests/SymbolFile/DWARF/DWARFDIETest.cpp -lldb/unittests/SymbolFile/DWARF/DWARFIndexCachingTest.cpp -lldb/unittests/SymbolFile/DWARF/DWARFUnitTest.cpp -lldb/unittests/SymbolFile/DWARF/Inputs/test-dwarf.cpp -lldb/unittests/SymbolFile/NativePDB/PdbFPOProgramToDWARFExpressionTests.cpp -lldb/unittests/SymbolFile/PDB/Inputs/test-pdb-alt.cpp -lldb/unittests/SymbolFile/PDB/Inputs/test-pdb-nested.h -lldb/unittests/SymbolFile/PDB/Inputs/test-pdb-types.cpp -lldb/unittests/SymbolFile/PDB/Inputs/test-pdb.cpp -lldb/unittests/SymbolFile/PDB/Inputs/test-pdb.h -lldb/unittests/Target/DynamicRegisterInfoTest.cpp -lldb/unittests/Target/ExecutionContextTest.cpp -lldb/unittests/Target/FindFileTest.cpp -lldb/unittests/Target/MemoryTagMapTest.cpp -lldb/unittests/Target/RemoteAwarePlatformTest.cpp -lldb/unittests/Target/StackFrameRecognizerTest.cpp -lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp -lldb/unittests/TestingSupport/MockTildeExpressionResolver.h -lldb/unittests/TestingSupport/SubsystemRAII.h -lldb/unittests/TestingSupport/TestUtilities.cpp -lldb/unittests/TestingSupport/Host/NativeProcessTestUtils.h -lldb/unittests/TestingSupport/Symbol/ClangTestUtils.h -lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.cpp -lldb/unittests/TestingSupport/Symbol/YAMLModuleTester.h -lldb/unittests/Thread/ThreadTest.cpp -lldb/unittests/UnwindAssembly/PPC64/TestPPC64InstEmulation.cpp -lldb/unittests/Utility/AnsiTerminalTest.cpp -lldb/unittests/Utility/ArgsTest.cpp -lldb/unittests/Utility/BroadcasterTest.cpp -lldb/unittests/Utility/CompletionRequestTest.cpp -lldb/unittests/Utility/ConstStringTest.cpp -lldb/unittests/Utility/DataExtractorTest.cpp -lldb/unittests/Utility/EnvironmentTest.cpp -lldb/unittests/Utility/EventTest.cpp -lldb/unittests/Utility/ListenerTest.cpp -lldb/unittests/Utility/NameMatchesTest.cpp -lldb/unittests/Utility/OptionsWithRawTest.cpp -lldb/unittests/Utility/PredicateTest.cpp -lldb/unittests/Utility/ProcessInfoTest.cpp -lldb/unittests/Utility/RangeMapTest.cpp -lldb/unittests/Utility/RangeTest.cpp -lldb/unittests/Utility/RegisterValueTest.cpp -lldb/unittests/Utility/RegularExpressionTest.cpp -lldb/unittests/Utility/ReproducerTest.cpp -lldb/unittests/Utility/SharedClusterTest.cpp -lldb/unittests/Utility/StatusTest.cpp -lldb/unittests/Utility/StringExtractorGDBRemoteTest.cpp -lldb/unittests/Utility/StringExtractorTest.cpp -lldb/unittests/Utility/StringLexerTest.cpp -lldb/unittests/Utility/StringListTest.cpp -lldb/unittests/Utility/StructuredDataTest.cpp -lldb/unittests/Utility/SubsystemRAIITest.cpp -lldb/unittests/Utility/TildeExpressionResolverTest.cpp -lldb/unittests/Utility/TimeoutTest.cpp -lldb/unittests/Utility/UriParserTest.cpp -lldb/unittests/Utility/UserIDResolverTest.cpp -lldb/unittests/Utility/UUIDTest.cpp -lldb/unittests/Utility/VMRangeTest.cpp -lldb/utils/TableGen/LLDBOptionDefEmitter.cpp -lldb/utils/TableGen/LLDBPropertyDefEmitter.cpp -lldb/utils/TableGen/LLDBTableGen.cpp -lldb/utils/TableGen/LLDBTableGenBackends.h -lldb/utils/TableGen/LLDBTableGenUtils.cpp -lldb/utils/TableGen/LLDBTableGenUtils.h -llvm/bindings/ocaml/llvm/llvm_ocaml.h -llvm/cmake/dummy.cpp -llvm/cmake/unwind.h -llvm/examples/Bye/Bye.cpp -llvm/examples/HowToUseLLJIT/HowToUseLLJIT.cpp -llvm/examples/IRTransforms/InitializePasses.cpp -llvm/examples/IRTransforms/InitializePasses.h -llvm/examples/IRTransforms/SimplifyCFG.cpp -llvm/examples/IRTransforms/SimplifyCFG.h -llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h -llvm/examples/Kaleidoscope/BuildingAJIT/Chapter2/KaleidoscopeJIT.h -llvm/examples/Kaleidoscope/BuildingAJIT/Chapter3/KaleidoscopeJIT.h -llvm/examples/Kaleidoscope/Chapter2/toy.cpp -llvm/examples/Kaleidoscope/include/KaleidoscopeJIT.h -llvm/examples/OrcV2Examples/ExampleModules.h -llvm/examples/OrcV2Examples/LLJITDumpObjects/LLJITDumpObjects.cpp -llvm/examples/OrcV2Examples/LLJITWithCustomObjectLinkingLayer/LLJITWithCustomObjectLinkingLayer.cpp -llvm/examples/OrcV2Examples/LLJITWithExecutorProcessControl/LLJITWithExecutorProcessControl.cpp -llvm/examples/OrcV2Examples/LLJITWithGDBRegistrationListener/LLJITWithGDBRegistrationListener.cpp -llvm/examples/OrcV2Examples/LLJITWithInitializers/LLJITWithInitializers.cpp -llvm/examples/OrcV2Examples/LLJITWithLazyReexports/LLJITWithLazyReexports.cpp -llvm/examples/OrcV2Examples/LLJITWithObjectCache/LLJITWithObjectCache.cpp -llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform/LLJITWithOptimizingIRTransform.cpp -llvm/examples/OrcV2Examples/LLJITWithRemoteDebugging/RemoteJITUtils.h -llvm/include/llvm/PassRegistry.h -llvm/include/llvm/PassSupport.h -llvm/include/llvm/ADT/APInt.h -llvm/include/llvm/ADT/Bitfields.h -llvm/include/llvm/ADT/BitmaskEnum.h -llvm/include/llvm/ADT/BreadthFirstIterator.h -llvm/include/llvm/ADT/CachedHashString.h -llvm/include/llvm/ADT/CombinationGenerator.h -llvm/include/llvm/ADT/DAGDeltaAlgorithm.h -llvm/include/llvm/ADT/EnumeratedArray.h -llvm/include/llvm/ADT/EpochTracker.h -llvm/include/llvm/ADT/GenericCycleInfo.h -llvm/include/llvm/ADT/GenericSSAContext.h -llvm/include/llvm/ADT/ilist_base.h -llvm/include/llvm/ADT/ilist_iterator.h -llvm/include/llvm/ADT/ilist_node.h -llvm/include/llvm/ADT/ilist_node_base.h -llvm/include/llvm/ADT/IntrusiveRefCntPtr.h -llvm/include/llvm/ADT/PointerEmbeddedInt.h -llvm/include/llvm/ADT/ScopeExit.h -llvm/include/llvm/ADT/Sequence.h -llvm/include/llvm/ADT/simple_ilist.h -llvm/include/llvm/ADT/Statistic.h -llvm/include/llvm/ADT/STLArrayExtras.h -llvm/include/llvm/ADT/STLForwardCompat.h -llvm/include/llvm/ADT/StringSet.h -llvm/include/llvm/ADT/TypeSwitch.h -llvm/include/llvm/Analysis/BlockFrequencyInfo.h -llvm/include/llvm/Analysis/ConstraintSystem.h -llvm/include/llvm/Analysis/CostModel.h -llvm/include/llvm/Analysis/CycleAnalysis.h -llvm/include/llvm/Analysis/DDGPrinter.h -llvm/include/llvm/Analysis/Delinearization.h -llvm/include/llvm/Analysis/DependenceGraphBuilder.h -llvm/include/llvm/Analysis/DivergenceAnalysis.h -llvm/include/llvm/Analysis/DomTreeUpdater.h -llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h -llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h -llvm/include/llvm/Analysis/IndirectCallVisitor.h -llvm/include/llvm/Analysis/InlineAdvisor.h -llvm/include/llvm/Analysis/InlineCost.h -llvm/include/llvm/Analysis/InlineModelFeatureMaps.h -llvm/include/llvm/Analysis/InlineOrder.h -llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h -llvm/include/llvm/Analysis/InstCount.h -llvm/include/llvm/Analysis/InstructionSimplify.h -llvm/include/llvm/Analysis/InstSimplifyFolder.h -llvm/include/llvm/Analysis/IteratedDominanceFrontier.h -llvm/include/llvm/Analysis/Lint.h -llvm/include/llvm/Analysis/LoopCacheAnalysis.h -llvm/include/llvm/Analysis/LoopNestAnalysis.h -llvm/include/llvm/Analysis/MemDerefPrinter.h -llvm/include/llvm/Analysis/MLInlineAdvisor.h -llvm/include/llvm/Analysis/MLModelRunner.h -llvm/include/llvm/Analysis/ModelUnderTrainingRunner.h -llvm/include/llvm/Analysis/ModuleDebugInfoPrinter.h -llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h -llvm/include/llvm/Analysis/NoInferenceModelRunner.h -llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h -llvm/include/llvm/Analysis/ObjCARCInstKind.h -llvm/include/llvm/Analysis/ObjCARCUtil.h -llvm/include/llvm/Analysis/OverflowInstAnalysis.h -llvm/include/llvm/Analysis/PhiValues.h -llvm/include/llvm/Analysis/ReplayInlineAdvisor.h -llvm/include/llvm/Analysis/ScalarEvolutionDivision.h -llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h -llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h -llvm/include/llvm/Analysis/ScopedNoAliasAA.h -llvm/include/llvm/Analysis/StackLifetime.h -llvm/include/llvm/Analysis/StackSafetyAnalysis.h -llvm/include/llvm/Analysis/SyncDependenceAnalysis.h -llvm/include/llvm/Analysis/SyntheticCountsUtils.h -llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h -llvm/include/llvm/Analysis/TypeMetadataUtils.h -llvm/include/llvm/Analysis/ValueLatticeUtils.h -llvm/include/llvm/Analysis/Utils/TFUtils.h -llvm/include/llvm/AsmParser/LLToken.h -llvm/include/llvm/AsmParser/SlotMapping.h -llvm/include/llvm/BinaryFormat/COFF.h -llvm/include/llvm/BinaryFormat/Magic.h -llvm/include/llvm/BinaryFormat/Minidump.h -llvm/include/llvm/BinaryFormat/MsgPackDocument.h -llvm/include/llvm/BinaryFormat/MsgPackReader.h -llvm/include/llvm/BinaryFormat/MsgPackWriter.h -llvm/include/llvm/BinaryFormat/Swift.h -llvm/include/llvm/BinaryFormat/WasmTraits.h -llvm/include/llvm/Bitcode/BitcodeAnalyzer.h -llvm/include/llvm/Bitcode/BitcodeCommon.h -llvm/include/llvm/CodeGen/AsmPrinter.h -llvm/include/llvm/CodeGen/AsmPrinterHandler.h -llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h -llvm/include/llvm/CodeGen/CodeGenCommonISel.h -llvm/include/llvm/CodeGen/CodeGenPassBuilder.h -llvm/include/llvm/CodeGen/CommandFlags.h -llvm/include/llvm/CodeGen/CSEConfigBase.h -llvm/include/llvm/CodeGen/DebugHandlerBase.h -llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h -llvm/include/llvm/CodeGen/ExecutionDomainFix.h -llvm/include/llvm/CodeGen/ExpandVectorPredication.h -llvm/include/llvm/CodeGen/GCMetadataPrinter.h -llvm/include/llvm/CodeGen/IndirectThunks.h -llvm/include/llvm/CodeGen/ISDOpcodes.h -llvm/include/llvm/CodeGen/LiveIntervalCalc.h -llvm/include/llvm/CodeGen/LiveRangeCalc.h -llvm/include/llvm/CodeGen/LiveRegMatrix.h -llvm/include/llvm/CodeGen/LiveStacks.h -llvm/include/llvm/CodeGen/LoopTraversal.h -llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h -llvm/include/llvm/CodeGen/MachineCombinerPattern.h -llvm/include/llvm/CodeGen/MachineCycleAnalysis.h -llvm/include/llvm/CodeGen/MachineInstrBundleIterator.h -llvm/include/llvm/CodeGen/MachineLoopUtils.h -llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h -llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h -llvm/include/llvm/CodeGen/MachinePassManager.h -llvm/include/llvm/CodeGen/MachineRegionInfo.h -llvm/include/llvm/CodeGen/MachineSSAContext.h -llvm/include/llvm/CodeGen/MachineStableHash.h -llvm/include/llvm/CodeGen/MIRFormatter.h -llvm/include/llvm/CodeGen/MIRFSDiscriminator.h -llvm/include/llvm/CodeGen/MIRSampleProfile.h -llvm/include/llvm/CodeGen/MultiHazardRecognizer.h -llvm/include/llvm/CodeGen/NonRelocatableStringpool.h -llvm/include/llvm/CodeGen/ParallelCG.h -llvm/include/llvm/CodeGen/PBQPRAConstraint.h -llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h -llvm/include/llvm/CodeGen/RegisterBank.h -llvm/include/llvm/CodeGen/RegisterBankInfo.h -llvm/include/llvm/CodeGen/RegisterClassInfo.h -llvm/include/llvm/CodeGen/ReplaceWithVeclib.h -llvm/include/llvm/CodeGen/ScheduleDAGMutation.h -llvm/include/llvm/CodeGen/Spiller.h -llvm/include/llvm/CodeGen/StableHashing.h -llvm/include/llvm/CodeGen/TargetOpcodes.h -llvm/include/llvm/CodeGen/TileShapeInfo.h -llvm/include/llvm/CodeGen/UnreachableBlockElim.h -llvm/include/llvm/CodeGen/VLIWMachineScheduler.h -llvm/include/llvm/CodeGen/WasmEHFuncInfo.h -llvm/include/llvm/CodeGen/WinEHFuncInfo.h -llvm/include/llvm/CodeGen/GlobalISel/Combiner.h -llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h -llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h -llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h -llvm/include/llvm/CodeGen/GlobalISel/InlineAsmLowering.h -llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h -llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h -llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h -llvm/include/llvm/CodeGen/MIRParser/MIRParser.h -llvm/include/llvm/CodeGen/PBQP/CostAllocator.h -llvm/include/llvm/DebugInfo/DIContext.h -llvm/include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h -llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h -llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h -llvm/include/llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h -llvm/include/llvm/DebugInfo/CodeView/CVRecord.h -llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h -llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h -llvm/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h -llvm/include/llvm/DebugInfo/CodeView/DebugCrossExSubsection.h -llvm/include/llvm/DebugInfo/CodeView/DebugCrossImpSubsection.h -llvm/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h -llvm/include/llvm/DebugInfo/CodeView/DebugLinesSubsection.h -llvm/include/llvm/DebugInfo/CodeView/DebugStringTableSubsection.h -llvm/include/llvm/DebugInfo/CodeView/DebugSubsection.h -llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h -llvm/include/llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h -llvm/include/llvm/DebugInfo/CodeView/DebugSymbolRVASubsection.h -llvm/include/llvm/DebugInfo/CodeView/EnumTables.h -llvm/include/llvm/DebugInfo/CodeView/Formatters.h -llvm/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h -llvm/include/llvm/DebugInfo/CodeView/GUID.h -llvm/include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h -llvm/include/llvm/DebugInfo/CodeView/Line.h -llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h -llvm/include/llvm/DebugInfo/CodeView/RecordName.h -llvm/include/llvm/DebugInfo/CodeView/SimpleTypeSerializer.h -llvm/include/llvm/DebugInfo/CodeView/StringsAndChecksums.h -llvm/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h -llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h -llvm/include/llvm/DebugInfo/CodeView/SymbolRecordHelpers.h -llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h -llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h -llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h -llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h -llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h -llvm/include/llvm/DebugInfo/CodeView/TypeDumpVisitor.h -llvm/include/llvm/DebugInfo/CodeView/TypeRecordHelpers.h -llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h -llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h -llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h -llvm/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRnglists.h -llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h -llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h -llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h -llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h -llvm/include/llvm/DebugInfo/DWARF/DWARFSection.h -llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h -llvm/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h -llvm/include/llvm/DebugInfo/GSYM/FileEntry.h -llvm/include/llvm/DebugInfo/GSYM/Header.h -llvm/include/llvm/DebugInfo/GSYM/LookupResult.h -llvm/include/llvm/DebugInfo/GSYM/StringTable.h -llvm/include/llvm/DebugInfo/MSF/IMSFFile.h -llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h -llvm/include/llvm/DebugInfo/MSF/MSFCommon.h -llvm/include/llvm/DebugInfo/MSF/MSFError.h -llvm/include/llvm/DebugInfo/PDB/GenericError.h -llvm/include/llvm/DebugInfo/PDB/IPDBDataStream.h -llvm/include/llvm/DebugInfo/PDB/IPDBFrameData.h -llvm/include/llvm/DebugInfo/PDB/IPDBInjectedSource.h -llvm/include/llvm/DebugInfo/PDB/PDB.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolData.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolExe.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolFunc.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugEnd.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolFuncDebugStart.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolLabel.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolThunk.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeArray.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBaseClass.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeBuiltin.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeCustom.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeDimension.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFriend.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionArg.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeFunctionSig.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeManaged.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypePointer.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTable.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolUnknown.h -llvm/include/llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h -llvm/include/llvm/DebugInfo/PDB/UDTLayout.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIAFrameData.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIASectionContrib.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h -llvm/include/llvm/DebugInfo/PDB/DIA/DIAUtils.h -llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptor.h -llvm/include/llvm/DebugInfo/PDB/Native/DbiModuleList.h -llvm/include/llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h -llvm/include/llvm/DebugInfo/PDB/Native/Hash.h -llvm/include/llvm/DebugInfo/PDB/Native/ISectionContribVisitor.h -llvm/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h -llvm/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeCompilandSymbol.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumGlobals.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumLineNumbers.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumSymbols.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeEnumTypes.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeExeSymbol.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeFunctionSymbol.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeInlineSiteSymbol.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeLineNumber.h -llvm/include/llvm/DebugInfo/PDB/Native/NativePublicSymbol.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeSession.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeSourceFile.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeSymbolEnumerator.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeArray.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeBuiltin.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeEnum.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeFunctionSig.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypePointer.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeTypedef.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeUDT.h -llvm/include/llvm/DebugInfo/PDB/Native/NativeTypeVTShape.h -llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h -llvm/include/llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h -llvm/include/llvm/DebugInfo/PDB/Native/RawConstants.h -llvm/include/llvm/DebugInfo/PDB/Native/RawError.h -llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h -llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h -llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h -llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h -llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h -llvm/include/llvm/Debuginfod/Debuginfod.h -llvm/include/llvm/Debuginfod/DIFetcher.h -llvm/include/llvm/Debuginfod/HTTPClient.h -llvm/include/llvm/Demangle/Demangle.h -llvm/include/llvm/Demangle/StringViewExtras.h -llvm/include/llvm/Demangle/Utility.h -llvm/include/llvm/DWARFLinker/DWARFLinker.h -llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h -llvm/include/llvm/DWARFLinker/DWARFLinkerDeclContext.h -llvm/include/llvm/DWARFLinker/DWARFStreamer.h -llvm/include/llvm/DWP/DWP.h -llvm/include/llvm/DWP/DWPError.h -llvm/include/llvm/DWP/DWPStringPool.h -llvm/include/llvm/ExecutionEngine/GenericValue.h -llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h -llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h -llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h -llvm/include/llvm/ExecutionEngine/JITLink/ELF.h -llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h -llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h -llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h -llvm/include/llvm/ExecutionEngine/JITLink/JITLinkDylib.h -llvm/include/llvm/ExecutionEngine/JITLink/MachO.h -llvm/include/llvm/ExecutionEngine/JITLink/MachO_arm64.h -llvm/include/llvm/ExecutionEngine/JITLink/MachO_x86_64.h -llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h -llvm/include/llvm/ExecutionEngine/JITLink/riscv.h -llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h -llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h -llvm/include/llvm/ExecutionEngine/Orc/CompileUtils.h -llvm/include/llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h -llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h -llvm/include/llvm/ExecutionEngine/Orc/DebugUtils.h -llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h -llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h -llvm/include/llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h -llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h -llvm/include/llvm/ExecutionEngine/Orc/EPCGenericDylibManager.h -llvm/include/llvm/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.h -llvm/include/llvm/ExecutionEngine/Orc/EPCGenericMemoryAccess.h -llvm/include/llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h -llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h -llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h -llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h -llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h -llvm/include/llvm/ExecutionEngine/Orc/Layer.h -llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h -llvm/include/llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h -llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h -llvm/include/llvm/ExecutionEngine/Orc/Mangling.h -llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h -llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h -llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h -llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h -llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h -llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h -llvm/include/llvm/ExecutionEngine/Orc/Speculation.h -llvm/include/llvm/ExecutionEngine/Orc/ThreadSafeModule.h -llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h -llvm/include/llvm/ExecutionEngine/Orc/Shared/OrcRTBridge.h -llvm/include/llvm/ExecutionEngine/Orc/Shared/SimplePackedSerialization.h -llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h -llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/ExecutorBootstrapService.h -llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h -llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.h -llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.h -llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.h -llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h -llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h -llvm/include/llvm/FileCheck/FileCheck.h -llvm/include/llvm/Frontend/OpenMP/OMPAssume.h -llvm/include/llvm/Frontend/OpenMP/OMPConstants.h -llvm/include/llvm/Frontend/OpenMP/OMPContext.h -llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h -llvm/include/llvm/InterfaceStub/ELFObjHandler.h -llvm/include/llvm/InterfaceStub/IFSHandler.h -llvm/include/llvm/InterfaceStub/IFSStub.h -llvm/include/llvm/IR/Assumptions.h -llvm/include/llvm/IR/BuiltinGCs.h -llvm/include/llvm/IR/Comdat.h -llvm/include/llvm/IR/Constants.h -llvm/include/llvm/IR/DebugInfo.h -llvm/include/llvm/IR/DebugInfoMetadata.h -llvm/include/llvm/IR/DiagnosticHandler.h -llvm/include/llvm/IR/DiagnosticPrinter.h -llvm/include/llvm/IR/EHPersonalities.h -llvm/include/llvm/IR/GlobalIFunc.h -llvm/include/llvm/IR/GlobalObject.h -llvm/include/llvm/IR/GVMaterializer.h -llvm/include/llvm/IR/IRPrintingPasses.h -llvm/include/llvm/IR/LLVMRemarkStreamer.h -llvm/include/llvm/IR/MatrixBuilder.h -llvm/include/llvm/IR/ModuleSlotTracker.h -llvm/include/llvm/IR/OptBisect.h -llvm/include/llvm/IR/PassInstrumentation.h -llvm/include/llvm/IR/PassManagerImpl.h -llvm/include/llvm/IR/PassTimingInfo.h -llvm/include/llvm/IR/PredIteratorCache.h -llvm/include/llvm/IR/PrintPasses.h -llvm/include/llvm/IR/ProfileSummary.h -llvm/include/llvm/IR/PseudoProbe.h -llvm/include/llvm/IR/ReplaceConstant.h -llvm/include/llvm/IR/SSAContext.h -llvm/include/llvm/IR/StructuralHash.h -llvm/include/llvm/IR/TrackingMDRef.h -llvm/include/llvm/IR/UseListOrder.h -llvm/include/llvm/MC/MCAsmInfoCOFF.h -llvm/include/llvm/MC/MCAsmInfoDarwin.h -llvm/include/llvm/MC/MCAsmInfoELF.h -llvm/include/llvm/MC/MCAsmInfoGOFF.h -llvm/include/llvm/MC/MCAsmInfoWasm.h -llvm/include/llvm/MC/MCAsmInfoXCOFF.h -llvm/include/llvm/MC/MCCodeView.h -llvm/include/llvm/MC/MCContext.h -llvm/include/llvm/MC/MCFixedLenDisassembler.h -llvm/include/llvm/MC/MCLabel.h -llvm/include/llvm/MC/MCObjectWriter.h -llvm/include/llvm/MC/MCPseudoProbe.h -llvm/include/llvm/MC/MCSectionCOFF.h -llvm/include/llvm/MC/MCSectionGOFF.h -llvm/include/llvm/MC/MCSectionWasm.h -llvm/include/llvm/MC/MCSectionXCOFF.h -llvm/include/llvm/MC/MCSymbolGOFF.h -llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h -llvm/include/llvm/MC/MCWasmObjectWriter.h -llvm/include/llvm/MC/MCWasmStreamer.h -llvm/include/llvm/MC/MCWinCOFFStreamer.h -llvm/include/llvm/MC/MCXCOFFObjectWriter.h -llvm/include/llvm/MC/MCXCOFFStreamer.h -llvm/include/llvm/MC/MCDisassembler/MCRelocationInfo.h -llvm/include/llvm/MC/MCParser/AsmCond.h -llvm/include/llvm/MC/MCParser/AsmLexer.h -llvm/include/llvm/MC/MCParser/MCAsmParserUtils.h -llvm/include/llvm/MCA/CodeEmitter.h -llvm/include/llvm/MCA/Context.h -llvm/include/llvm/MCA/CustomBehaviour.h -llvm/include/llvm/MCA/HWEventListener.h -llvm/include/llvm/MCA/InstrBuilder.h -llvm/include/llvm/MCA/Instruction.h -llvm/include/llvm/MCA/Pipeline.h -llvm/include/llvm/MCA/SourceMgr.h -llvm/include/llvm/MCA/Support.h -llvm/include/llvm/MCA/View.h -llvm/include/llvm/MCA/HardwareUnits/HardwareUnit.h -llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h -llvm/include/llvm/MCA/HardwareUnits/ResourceManager.h -llvm/include/llvm/MCA/HardwareUnits/RetireControlUnit.h -llvm/include/llvm/MCA/Stages/DispatchStage.h -llvm/include/llvm/MCA/Stages/EntryStage.h -llvm/include/llvm/MCA/Stages/ExecuteStage.h -llvm/include/llvm/MCA/Stages/InOrderIssueStage.h -llvm/include/llvm/MCA/Stages/InstructionTables.h -llvm/include/llvm/MCA/Stages/MicroOpQueueStage.h -llvm/include/llvm/MCA/Stages/RetireStage.h -llvm/include/llvm/MCA/Stages/Stage.h -llvm/include/llvm/ObjCopy/CommonConfig.h -llvm/include/llvm/ObjCopy/MultiFormatConfig.h -llvm/include/llvm/ObjCopy/ObjCopy.h -llvm/include/llvm/ObjCopy/COFF/COFFConfig.h -llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h -llvm/include/llvm/ObjCopy/ELF/ELFConfig.h -llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h -llvm/include/llvm/ObjCopy/MachO/MachOConfig.h -llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h -llvm/include/llvm/ObjCopy/wasm/WasmConfig.h -llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h -llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h -llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h -llvm/include/llvm/Object/Archive.h -llvm/include/llvm/Object/COFFModuleDefinition.h -llvm/include/llvm/Object/Decompressor.h -llvm/include/llvm/Object/FaultMapParser.h -llvm/include/llvm/Object/MachOUniversalWriter.h -llvm/include/llvm/Object/Minidump.h -llvm/include/llvm/Object/ModuleSymbolTable.h -llvm/include/llvm/Object/RelocationResolver.h -llvm/include/llvm/Object/TapiFile.h -llvm/include/llvm/Object/TapiUniversal.h -llvm/include/llvm/Object/WindowsResource.h -llvm/include/llvm/Object/XCOFFObjectFile.h -llvm/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h -llvm/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h -llvm/include/llvm/ObjectYAML/CodeViewYAMLTypeHashing.h -llvm/include/llvm/ObjectYAML/CodeViewYAMLTypes.h -llvm/include/llvm/ObjectYAML/DWARFEmitter.h -llvm/include/llvm/ObjectYAML/DWARFYAML.h -llvm/include/llvm/ObjectYAML/MachOYAML.h -llvm/include/llvm/ObjectYAML/MinidumpYAML.h -llvm/include/llvm/ObjectYAML/ObjectYAML.h -llvm/include/llvm/ObjectYAML/WasmYAML.h -llvm/include/llvm/ObjectYAML/YAML.h -llvm/include/llvm/ObjectYAML/yaml2obj.h -llvm/include/llvm/Option/OptSpecifier.h -llvm/include/llvm/Passes/OptimizationLevel.h -llvm/include/llvm/Passes/StandardInstrumentations.h -llvm/include/llvm/ProfileData/GCOV.h -llvm/include/llvm/ProfileData/InstrProfCorrelator.h -llvm/include/llvm/ProfileData/InstrProfWriter.h -llvm/include/llvm/ProfileData/ItaniumManglingCanonicalizer.h -llvm/include/llvm/ProfileData/ProfileCommon.h -llvm/include/llvm/ProfileData/RawMemProfReader.h -llvm/include/llvm/ProfileData/SymbolRemappingReader.h -llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h -llvm/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h -llvm/include/llvm/Remarks/BitstreamRemarkContainer.h -llvm/include/llvm/Remarks/BitstreamRemarkParser.h -llvm/include/llvm/Remarks/BitstreamRemarkSerializer.h -llvm/include/llvm/Remarks/HotnessThresholdParser.h -llvm/include/llvm/Remarks/RemarkFormat.h -llvm/include/llvm/Remarks/RemarkLinker.h -llvm/include/llvm/Remarks/RemarkParser.h -llvm/include/llvm/Remarks/RemarkSerializer.h -llvm/include/llvm/Remarks/RemarkStreamer.h -llvm/include/llvm/Remarks/RemarkStringTable.h -llvm/include/llvm/Remarks/YAMLRemarkSerializer.h -llvm/include/llvm/Support/Alignment.h -llvm/include/llvm/Support/AlignOf.h -llvm/include/llvm/Support/AllocatorBase.h -llvm/include/llvm/Support/AutoConvert.h -llvm/include/llvm/Support/Base64.h -llvm/include/llvm/Support/BCD.h -llvm/include/llvm/Support/BinaryByteStream.h -llvm/include/llvm/Support/BinaryItemStream.h -llvm/include/llvm/Support/BinaryStream.h -llvm/include/llvm/Support/BinaryStreamError.h -llvm/include/llvm/Support/BinaryStreamReader.h -llvm/include/llvm/Support/BinaryStreamRef.h -llvm/include/llvm/Support/BinaryStreamWriter.h -llvm/include/llvm/Support/BuryPointer.h -llvm/include/llvm/Support/CachePruning.h -llvm/include/llvm/Support/Caching.h -llvm/include/llvm/Support/CFGDiff.h -llvm/include/llvm/Support/CFGUpdate.h -llvm/include/llvm/Support/CodeGenCoverage.h -llvm/include/llvm/Support/CRC.h -llvm/include/llvm/Support/CSKYAttributeParser.h -llvm/include/llvm/Support/CSKYAttributes.h -llvm/include/llvm/TargetParser/CSKYTargetParser.h -llvm/include/llvm/Support/DataTypes.h -llvm/include/llvm/Support/DebugCounter.h -llvm/include/llvm/Support/Discriminator.h -llvm/include/llvm/Support/DivisionByConstantInfo.h -llvm/include/llvm/Support/DJB.h -llvm/include/llvm/Support/ELFAttributeParser.h -llvm/include/llvm/Support/ELFAttributes.h -llvm/include/llvm/Support/ExitCodes.h -llvm/include/llvm/Support/FileCollector.h -llvm/include/llvm/Support/FileOutputBuffer.h -llvm/include/llvm/Support/GenericIteratedDominanceFrontier.h -llvm/include/llvm/Support/HashBuilder.h -llvm/include/llvm/Support/InitLLVM.h -llvm/include/llvm/Support/InstructionCost.h -llvm/include/llvm/Support/MD5.h -llvm/include/llvm/Support/MemAlloc.h -llvm/include/llvm/Support/MemoryBufferRef.h -llvm/include/llvm/Support/MSP430AttributeParser.h -llvm/include/llvm/Support/MSP430Attributes.h -llvm/include/llvm/Support/MSVCErrorWorkarounds.h -llvm/include/llvm/Support/Parallel.h -llvm/include/llvm/Support/PGOOptions.h -llvm/include/llvm/Support/PointerLikeTypeTraits.h -llvm/include/llvm/Support/RISCVAttributeParser.h -llvm/include/llvm/Support/RISCVAttributes.h -llvm/include/llvm/TargetParser/RISCVISAInfo.h -llvm/include/llvm/Support/RWMutex.h -llvm/include/llvm/Support/ScopedPrinter.h -llvm/include/llvm/Support/SHA256.h -llvm/include/llvm/Support/Signposts.h -llvm/include/llvm/Support/SmallVectorMemoryBuffer.h -llvm/include/llvm/Support/SMLoc.h -llvm/include/llvm/Support/SMTAPI.h -llvm/include/llvm/Support/SourceMgr.h -llvm/include/llvm/Support/SuffixTree.h -llvm/include/llvm/Support/SystemUtils.h -llvm/include/llvm/TargetParser/TargetParser.h -llvm/include/llvm/Support/TrailingObjects.h -llvm/include/llvm/Support/Unicode.h -llvm/include/llvm/Support/UnicodeCharRanges.h -llvm/include/llvm/Support/VersionTuple.h -llvm/include/llvm/Support/WindowsError.h -llvm/include/llvm/Support/WithColor.h -llvm/include/llvm/Support/FileSystem/UniqueID.h -llvm/include/llvm/Support/Solaris/sys/regset.h -llvm/include/llvm/TableGen/DirectiveEmitter.h -llvm/include/llvm/TableGen/Parser.h -llvm/include/llvm/TableGen/StringToOffsetTable.h -llvm/include/llvm/Target/CGPassBuilderOption.h -llvm/include/llvm/Target/CodeGenCWrappers.h -llvm/include/llvm/Testing/Annotations/Annotations.h -llvm/include/llvm/Testing/Support/SupportHelpers.h -llvm/include/llvm/TextAPI/Architecture.h -llvm/include/llvm/TextAPI/ArchitectureSet.h -llvm/include/llvm/TextAPI/InterfaceFile.h -llvm/include/llvm/TextAPI/PackedVersion.h -llvm/include/llvm/TextAPI/Platform.h -llvm/include/llvm/TextAPI/Symbol.h -llvm/include/llvm/TextAPI/Target.h -llvm/include/llvm/TextAPI/TextAPIReader.h -llvm/include/llvm/TextAPI/TextAPIWriter.h -llvm/include/llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h -llvm/include/llvm/Transforms/CFGuard.h -llvm/include/llvm/Transforms/Utils.h -llvm/include/llvm/Transforms/Coroutines/CoroCleanup.h -llvm/include/llvm/Transforms/Coroutines/CoroEarly.h -llvm/include/llvm/Transforms/Coroutines/CoroElide.h -llvm/include/llvm/Transforms/Coroutines/CoroSplit.h -llvm/include/llvm/Transforms/InstCombine/InstCombiner.h -llvm/include/llvm/Transforms/Instrumentation/AddressSanitizer.h -llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h -llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerOptions.h -llvm/include/llvm/Transforms/Instrumentation/CGProfile.h -llvm/include/llvm/Transforms/Instrumentation/DataFlowSanitizer.h -llvm/include/llvm/Transforms/Instrumentation/HWAddressSanitizer.h -llvm/include/llvm/Transforms/Instrumentation/InstrOrderFile.h -llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h -llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h -llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h -llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h -llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h -llvm/include/llvm/Transforms/IPO/Attributor.h -llvm/include/llvm/Transforms/IPO/BlockExtractor.h -llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h -llvm/include/llvm/Transforms/IPO/ConstantMerge.h -llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h -llvm/include/llvm/Transforms/IPO/ElimAvailExtern.h -llvm/include/llvm/Transforms/IPO/FunctionAttrs.h -llvm/include/llvm/Transforms/IPO/FunctionImport.h -llvm/include/llvm/Transforms/IPO/GlobalOpt.h -llvm/include/llvm/Transforms/IPO/GlobalSplit.h -llvm/include/llvm/Transforms/IPO/Inliner.h -llvm/include/llvm/Transforms/IPO/Internalize.h -llvm/include/llvm/Transforms/IPO/LoopExtractor.h -llvm/include/llvm/Transforms/IPO/MergeFunctions.h -llvm/include/llvm/Transforms/IPO/ModuleInliner.h -llvm/include/llvm/Transforms/IPO/OpenMPOpt.h -llvm/include/llvm/Transforms/IPO/PartialInlining.h -llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h -llvm/include/llvm/Transforms/IPO/SampleContextTracker.h -llvm/include/llvm/Transforms/IPO/SampleProfile.h -llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h -llvm/include/llvm/Transforms/IPO/SCCP.h -llvm/include/llvm/Transforms/IPO/StripSymbols.h -llvm/include/llvm/Transforms/IPO/ThinLTOBitcodeWriter.h -llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h -llvm/include/llvm/Transforms/Scalar/ADCE.h -llvm/include/llvm/Transforms/Scalar/AnnotationRemarks.h -llvm/include/llvm/Transforms/Scalar/CallSiteSplitting.h -llvm/include/llvm/Transforms/Scalar/ConstraintElimination.h -llvm/include/llvm/Transforms/Scalar/CorrelatedValuePropagation.h -llvm/include/llvm/Transforms/Scalar/DeadStoreElimination.h -llvm/include/llvm/Transforms/Scalar/DFAJumpThreading.h -llvm/include/llvm/Transforms/Scalar/EarlyCSE.h -llvm/include/llvm/Transforms/Scalar/FlattenCFG.h -llvm/include/llvm/Transforms/Scalar/GVNExpression.h -llvm/include/llvm/Transforms/Scalar/InductiveRangeCheckElimination.h -llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h -llvm/include/llvm/Transforms/Scalar/InferAddressSpaces.h -llvm/include/llvm/Transforms/Scalar/InstSimplifyPass.h -llvm/include/llvm/Transforms/Scalar/JumpThreading.h -llvm/include/llvm/Transforms/Scalar/LICM.h -llvm/include/llvm/Transforms/Scalar/LoopBoundSplit.h -llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h -llvm/include/llvm/Transforms/Scalar/LoopDeletion.h -llvm/include/llvm/Transforms/Scalar/LoopDistribute.h -llvm/include/llvm/Transforms/Scalar/LoopFlatten.h -llvm/include/llvm/Transforms/Scalar/LoopFuse.h -llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h -llvm/include/llvm/Transforms/Scalar/LoopInstSimplify.h -llvm/include/llvm/Transforms/Scalar/LoopInterchange.h -llvm/include/llvm/Transforms/Scalar/LoopLoadElimination.h -llvm/include/llvm/Transforms/Scalar/LoopPredication.h -llvm/include/llvm/Transforms/Scalar/LoopReroll.h -llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h -llvm/include/llvm/Transforms/Scalar/LoopStrengthReduce.h -llvm/include/llvm/Transforms/Scalar/LoopUnrollAndJamPass.h -llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h -llvm/include/llvm/Transforms/Scalar/LoopVersioningLICM.h -llvm/include/llvm/Transforms/Scalar/LowerMatrixIntrinsics.h -llvm/include/llvm/Transforms/Scalar/MakeGuardsExplicit.h -llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h -llvm/include/llvm/Transforms/Scalar/NaryReassociate.h -llvm/include/llvm/Transforms/Scalar/Reassociate.h -llvm/include/llvm/Transforms/Scalar/Reg2Mem.h -llvm/include/llvm/Transforms/Scalar/RewriteStatepointsForGC.h -llvm/include/llvm/Transforms/Scalar/ScalarizeMaskedMemIntrin.h -llvm/include/llvm/Transforms/Scalar/SCCP.h -llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h -llvm/include/llvm/Transforms/Scalar/SimpleLoopUnswitch.h -llvm/include/llvm/Transforms/Scalar/StraightLineStrengthReduce.h -llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h -llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h -llvm/include/llvm/Transforms/Utils/AddDiscriminators.h -llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h -llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h -llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h -llvm/include/llvm/Transforms/Utils/BypassSlowDivision.h -llvm/include/llvm/Transforms/Utils/CallGraphUpdater.h -llvm/include/llvm/Transforms/Utils/CallPromotionUtils.h -llvm/include/llvm/Transforms/Utils/CanonicalizeAliases.h -llvm/include/llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h -llvm/include/llvm/Transforms/Utils/CodeLayout.h -llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h -llvm/include/llvm/Transforms/Utils/Debugify.h -llvm/include/llvm/Transforms/Utils/EntryExitInstrumenter.h -llvm/include/llvm/Transforms/Utils/FixIrreducible.h -llvm/include/llvm/Transforms/Utils/GlobalStatus.h -llvm/include/llvm/Transforms/Utils/HelloWorld.h -llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h -llvm/include/llvm/Transforms/Utils/InstructionNamer.h -llvm/include/llvm/Transforms/Utils/InstructionWorklist.h -llvm/include/llvm/Transforms/Utils/LCSSA.h -llvm/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h -llvm/include/llvm/Transforms/Utils/LoopPeel.h -llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h -llvm/include/llvm/Transforms/Utils/LoopSimplify.h -llvm/include/llvm/Transforms/Utils/LowerSwitch.h -llvm/include/llvm/Transforms/Utils/MatrixUtils.h -llvm/include/llvm/Transforms/Utils/Mem2Reg.h -llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h -llvm/include/llvm/Transforms/Utils/MetaRenamer.h -llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h -llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h -llvm/include/llvm/Transforms/Utils/SampleProfileInference.h -llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h -llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h -llvm/include/llvm/Transforms/Utils/SCCPSolver.h -llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h -llvm/include/llvm/Transforms/Utils/SplitModule.h -llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h -llvm/include/llvm/Transforms/Utils/StripGCRelocates.h -llvm/include/llvm/Transforms/Utils/StripNonLineTableDebugInfo.h -llvm/include/llvm/Transforms/Utils/SymbolRewriter.h -llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h -llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h -llvm/include/llvm/Transforms/Utils/ValueMapper.h -llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h -llvm/include/llvm/WindowsDriver/MSVCSetupApi.h -llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h -llvm/include/llvm/WindowsResource/ResourceScriptToken.h -llvm/include/llvm/XRay/BlockIndexer.h -llvm/include/llvm/XRay/BlockPrinter.h -llvm/include/llvm/XRay/BlockVerifier.h -llvm/include/llvm/XRay/FDRRecordConsumer.h -llvm/include/llvm/XRay/FDRRecordProducer.h -llvm/include/llvm/XRay/FDRRecords.h -llvm/include/llvm/XRay/FDRTraceExpander.h -llvm/include/llvm/XRay/FileHeaderReader.h -llvm/include/llvm/XRay/InstrumentationMap.h -llvm/include/llvm/XRay/Profile.h -llvm/include/llvm/XRay/RecordPrinter.h -llvm/include/llvm/XRay/Trace.h -llvm/include/llvm/XRay/YAMLXRayRecord.h -llvm/include/llvm-c/BitReader.h -llvm/include/llvm-c/BitWriter.h -llvm/include/llvm-c/Comdat.h -llvm/include/llvm-c/Error.h -llvm/include/llvm-c/ErrorHandling.h -llvm/include/llvm-c/ExternC.h -llvm/include/llvm-c/IRReader.h -llvm/include/llvm-c/LLJIT.h -llvm/include/llvm-c/OrcEE.h -llvm/include/llvm-c/Remarks.h -llvm/include/llvm-c/Types.h -llvm/include/llvm-c/Transforms/PassBuilder.h -llvm/lib/Analysis/CodeMetrics.cpp -llvm/lib/Analysis/CycleAnalysis.cpp -llvm/lib/Analysis/DDGPrinter.cpp -llvm/lib/Analysis/Delinearization.cpp -llvm/lib/Analysis/DependenceGraphBuilder.cpp -llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp -llvm/lib/Analysis/DivergenceAnalysis.cpp -llvm/lib/Analysis/DomTreeUpdater.cpp -llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp -llvm/lib/Analysis/ImportedFunctionsInliningStatistics.cpp -llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp -llvm/lib/Analysis/InlineAdvisor.cpp -llvm/lib/Analysis/InlineCost.cpp -llvm/lib/Analysis/InstCount.cpp -llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp -llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp -llvm/lib/Analysis/LoopNestAnalysis.cpp -llvm/lib/Analysis/LoopUnrollAnalyzer.cpp -llvm/lib/Analysis/MLInlineAdvisor.cpp -llvm/lib/Analysis/ModelUnderTrainingRunner.cpp -llvm/lib/Analysis/NoInferenceModelRunner.cpp -llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp -llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp -llvm/lib/Analysis/ObjCARCInstKind.cpp -llvm/lib/Analysis/OptimizationRemarkEmitter.cpp -llvm/lib/Analysis/OverflowInstAnalysis.cpp -llvm/lib/Analysis/ReplayInlineAdvisor.cpp -llvm/lib/Analysis/ScalarEvolutionDivision.cpp -llvm/lib/Analysis/ScalarEvolutionNormalization.cpp -llvm/lib/Analysis/ScopedNoAliasAA.cpp -llvm/lib/Analysis/SyntheticCountsUtils.cpp -llvm/lib/Analysis/TargetTransformInfo.cpp -llvm/lib/Analysis/TFUtils.cpp -llvm/lib/Analysis/TypeMetadataUtils.cpp -llvm/lib/Analysis/ValueLattice.cpp -llvm/lib/Analysis/ValueLatticeUtils.cpp -llvm/lib/Analysis/VFABIDemangling.cpp -llvm/lib/AsmParser/Parser.cpp -llvm/lib/BinaryFormat/COFF.cpp -llvm/lib/BinaryFormat/ELF.cpp -llvm/lib/BinaryFormat/MachO.cpp -llvm/lib/BinaryFormat/Magic.cpp -llvm/lib/BinaryFormat/Minidump.cpp -llvm/lib/BinaryFormat/MsgPackDocument.cpp -llvm/lib/BinaryFormat/MsgPackReader.cpp -llvm/lib/BinaryFormat/MsgPackWriter.cpp -llvm/lib/BinaryFormat/Wasm.cpp -llvm/lib/BinaryFormat/XCOFF.cpp -llvm/lib/Bitcode/Reader/MetadataLoader.cpp -llvm/lib/Bitcode/Reader/ValueList.cpp -llvm/lib/CodeGen/AllocationOrder.cpp -llvm/lib/CodeGen/AllocationOrder.h -llvm/lib/CodeGen/CFGuardLongjmp.cpp -llvm/lib/CodeGen/CodeGen.cpp -llvm/lib/CodeGen/CodeGenPassBuilder.cpp -llvm/lib/CodeGen/DwarfEHPrepare.cpp -llvm/lib/CodeGen/EHContGuardCatchret.cpp -llvm/lib/CodeGen/ExecutionDomainFix.cpp -llvm/lib/CodeGen/ExpandVectorPredication.cpp -llvm/lib/CodeGen/FaultMaps.cpp -llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp -llvm/lib/CodeGen/GCMetadataPrinter.cpp -llvm/lib/CodeGen/IndirectBrExpandPass.cpp -llvm/lib/CodeGen/JMCInstrumenter.cpp -llvm/lib/CodeGen/LiveDebugVariables.h -llvm/lib/CodeGen/LiveIntervalCalc.cpp -llvm/lib/CodeGen/LiveRangeShrink.cpp -llvm/lib/CodeGen/LiveRegUnits.cpp -llvm/lib/CodeGen/LoopTraversal.cpp -llvm/lib/CodeGen/LowLevelType.cpp -llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp -llvm/lib/CodeGen/MachineCheckDebugify.cpp -llvm/lib/CodeGen/MachineCycleAnalysis.cpp -llvm/lib/CodeGen/MachineDebugify.cpp -llvm/lib/CodeGen/MachineFunctionPass.cpp -llvm/lib/CodeGen/MachineFunctionSplitter.cpp -llvm/lib/CodeGen/MachineModuleInfoImpls.cpp -llvm/lib/CodeGen/MachineModuleSlotTracker.cpp -llvm/lib/CodeGen/MachineOutliner.cpp -llvm/lib/CodeGen/MachinePassManager.cpp -llvm/lib/CodeGen/MachineSSAContext.cpp -llvm/lib/CodeGen/MachineStableHash.cpp -llvm/lib/CodeGen/MachineStripDebug.cpp -llvm/lib/CodeGen/MIRFSDiscriminator.cpp -llvm/lib/CodeGen/MIRNamerPass.cpp -llvm/lib/CodeGen/MIRPrintingPass.cpp -llvm/lib/CodeGen/MIRSampleProfile.cpp -llvm/lib/CodeGen/MIRVRegNamerUtils.cpp -llvm/lib/CodeGen/MIRYamlMapping.cpp -llvm/lib/CodeGen/MLRegAllocEvictAdvisor.cpp -llvm/lib/CodeGen/MultiHazardRecognizer.cpp -llvm/lib/CodeGen/NonRelocatableStringpool.cpp -llvm/lib/CodeGen/ParallelCG.cpp -llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp -llvm/lib/CodeGen/PseudoProbeInserter.cpp -llvm/lib/CodeGen/RegAllocBase.cpp -llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp -llvm/lib/CodeGen/RegAllocEvictionAdvisor.h -llvm/lib/CodeGen/RegAllocGreedy.h -llvm/lib/CodeGen/RegAllocScore.cpp -llvm/lib/CodeGen/RegAllocScore.h -llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp -llvm/lib/CodeGen/ReplaceWithVeclib.cpp -llvm/lib/CodeGen/SafeStackLayout.cpp -llvm/lib/CodeGen/SafeStackLayout.h -llvm/lib/CodeGen/SpillPlacement.h -llvm/lib/CodeGen/TargetOptionsImpl.cpp -llvm/lib/CodeGen/VLIWMachineScheduler.cpp -llvm/lib/CodeGen/WasmEHPrepare.cpp -llvm/lib/CodeGen/XRayInstrumentation.cpp -llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp -llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp -llvm/lib/CodeGen/AsmPrinter/AIXException.cpp -llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp -llvm/lib/CodeGen/AsmPrinter/DwarfException.h -llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h -llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp -llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp -llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h -llvm/lib/CodeGen/AsmPrinter/EHStreamer.h -llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp -llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp -llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.h -llvm/lib/CodeGen/AsmPrinter/WasmException.cpp -llvm/lib/CodeGen/AsmPrinter/WasmException.h -llvm/lib/CodeGen/AsmPrinter/WinCFGuard.cpp -llvm/lib/CodeGen/AsmPrinter/WinCFGuard.h -llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp -llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp -llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp -llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp -llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp -llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp -llvm/lib/CodeGen/GlobalISel/LegalizeMutations.cpp -llvm/lib/CodeGen/GlobalISel/Localizer.cpp -llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp -llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp -llvm/lib/CodeGen/MIRParser/MILexer.h -llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp -llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h -llvm/lib/DebugInfo/CodeView/CodeViewError.cpp -llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp -llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp -llvm/lib/DebugInfo/CodeView/DebugCrossExSubsection.cpp -llvm/lib/DebugInfo/CodeView/DebugFrameDataSubsection.cpp -llvm/lib/DebugInfo/CodeView/DebugLinesSubsection.cpp -llvm/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp -llvm/lib/DebugInfo/CodeView/DebugSubsection.cpp -llvm/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp -llvm/lib/DebugInfo/CodeView/DebugSubsectionVisitor.cpp -llvm/lib/DebugInfo/CodeView/DebugSymbolRVASubsection.cpp -llvm/lib/DebugInfo/CodeView/DebugSymbolsSubsection.cpp -llvm/lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp -llvm/lib/DebugInfo/CodeView/Line.cpp -llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp -llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp -llvm/lib/DebugInfo/CodeView/SimpleTypeSerializer.cpp -llvm/lib/DebugInfo/CodeView/StringsAndChecksums.cpp -llvm/lib/DebugInfo/CodeView/SymbolRecordHelpers.cpp -llvm/lib/DebugInfo/CodeView/SymbolSerializer.cpp -llvm/lib/DebugInfo/CodeView/TypeHashing.cpp -llvm/lib/DebugInfo/CodeView/TypeIndex.cpp -llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp -llvm/lib/DebugInfo/DWARF/DWARFAddressRange.cpp -llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp -llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp -llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp -llvm/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp -llvm/lib/DebugInfo/DWARF/DWARFDebugRnglists.cpp -llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp -llvm/lib/DebugInfo/DWARF/DWARFLocationExpression.cpp -llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp -llvm/lib/DebugInfo/GSYM/GsymCreator.cpp -llvm/lib/DebugInfo/GSYM/LookupResult.cpp -llvm/lib/DebugInfo/MSF/MSFBuilder.cpp -llvm/lib/DebugInfo/MSF/MSFCommon.cpp -llvm/lib/DebugInfo/MSF/MSFError.cpp -llvm/lib/DebugInfo/PDB/GenericError.cpp -llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp -llvm/lib/DebugInfo/PDB/PDB.cpp -llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp -llvm/lib/DebugInfo/PDB/PDBSymbol.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolData.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolExe.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugEnd.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolFuncDebugStart.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolLabel.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolPublicSymbol.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolThunk.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeArray.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeBaseClass.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeBuiltin.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeCustom.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeDimension.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeEnum.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeFriend.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeFunctionArg.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeManaged.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypePointer.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeTypedef.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeUDT.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTable.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolTypeVTableShape.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolUnknown.cpp -llvm/lib/DebugInfo/PDB/PDBSymbolUsingNamespace.cpp -llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp -llvm/lib/DebugInfo/PDB/DIA/DIADataStream.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAEnumDebugStreams.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAEnumFrameData.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAEnumInjectedSources.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAEnumLineNumbers.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAEnumSectionContribs.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAEnumSourceFiles.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAEnumTables.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAFrameData.cpp -llvm/lib/DebugInfo/PDB/DIA/DIAInjectedSource.cpp -llvm/lib/DebugInfo/PDB/DIA/DIALineNumber.cpp -llvm/lib/DebugInfo/PDB/DIA/DIARawSymbol.cpp -llvm/lib/DebugInfo/PDB/DIA/DIASourceFile.cpp -llvm/lib/DebugInfo/PDB/DIA/DIATable.cpp -llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptor.cpp -llvm/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp -llvm/lib/DebugInfo/PDB/Native/GlobalsStream.cpp -llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp -llvm/lib/DebugInfo/PDB/Native/Hash.cpp -llvm/lib/DebugInfo/PDB/Native/HashTable.cpp -llvm/lib/DebugInfo/PDB/Native/InfoStream.cpp -llvm/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp -llvm/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp -llvm/lib/DebugInfo/PDB/Native/NativeCompilandSymbol.cpp -llvm/lib/DebugInfo/PDB/Native/NativeEnumGlobals.cpp -llvm/lib/DebugInfo/PDB/Native/NativeEnumLineNumbers.cpp -llvm/lib/DebugInfo/PDB/Native/NativeEnumSymbols.cpp -llvm/lib/DebugInfo/PDB/Native/NativeEnumTypes.cpp -llvm/lib/DebugInfo/PDB/Native/NativeExeSymbol.cpp -llvm/lib/DebugInfo/PDB/Native/NativeFunctionSymbol.cpp -llvm/lib/DebugInfo/PDB/Native/NativeInlineSiteSymbol.cpp -llvm/lib/DebugInfo/PDB/Native/NativeLineNumber.cpp -llvm/lib/DebugInfo/PDB/Native/NativePublicSymbol.cpp -llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp -llvm/lib/DebugInfo/PDB/Native/NativeSymbolEnumerator.cpp -llvm/lib/DebugInfo/PDB/Native/NativeTypeArray.cpp -llvm/lib/DebugInfo/PDB/Native/NativeTypeBuiltin.cpp -llvm/lib/DebugInfo/PDB/Native/NativeTypeEnum.cpp -llvm/lib/DebugInfo/PDB/Native/NativeTypePointer.cpp -llvm/lib/DebugInfo/PDB/Native/NativeTypeTypedef.cpp -llvm/lib/DebugInfo/PDB/Native/NativeTypeUDT.cpp -llvm/lib/DebugInfo/PDB/Native/NativeTypeVTShape.cpp -llvm/lib/DebugInfo/PDB/Native/PDBFile.cpp -llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp -llvm/lib/DebugInfo/PDB/Native/PDBStringTable.cpp -llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp -llvm/lib/DebugInfo/PDB/Native/PublicsStream.cpp -llvm/lib/DebugInfo/PDB/Native/RawError.cpp -llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp -llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp -llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp -llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp -llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp -llvm/lib/DebugInfo/Symbolize/Symbolize.cpp -llvm/lib/Debuginfod/Debuginfod.cpp -llvm/lib/Debuginfod/DIFetcher.cpp -llvm/lib/Debuginfod/HTTPClient.cpp -llvm/lib/Demangle/Demangle.cpp -llvm/lib/Demangle/DLangDemangle.cpp -llvm/lib/Demangle/MicrosoftDemangleNodes.cpp -llvm/lib/Demangle/RustDemangle.cpp -llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp -llvm/lib/DWARFLinker/DWARFLinkerDeclContext.cpp -llvm/lib/DWARFLinker/DWARFStreamer.cpp -llvm/lib/DWP/DWP.cpp -llvm/lib/DWP/DWPError.cpp -llvm/lib/ExecutionEngine/SectionMemoryManager.cpp -llvm/lib/ExecutionEngine/JITLink/aarch64.cpp -llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h -llvm/lib/ExecutionEngine/JITLink/ELF.cpp -llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp -llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp -llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp -llvm/lib/ExecutionEngine/JITLink/JITLink.cpp -llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp -llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp -llvm/lib/ExecutionEngine/JITLink/MachO.cpp -llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp -llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp -llvm/lib/ExecutionEngine/JITLink/MemoryFlags.cpp -llvm/lib/ExecutionEngine/JITLink/riscv.cpp -llvm/lib/ExecutionEngine/JITLink/x86_64.cpp -llvm/lib/ExecutionEngine/Orc/CompileUtils.cpp -llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp -llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp -llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp -llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp -llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp -llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp -llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp -llvm/lib/ExecutionEngine/Orc/EPCGenericDylibManager.cpp -llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp -llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp -llvm/lib/ExecutionEngine/Orc/IRCompileLayer.cpp -llvm/lib/ExecutionEngine/Orc/IRTransformLayer.cpp -llvm/lib/ExecutionEngine/Orc/Layer.cpp -llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp -llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp -llvm/lib/ExecutionEngine/Orc/Mangling.cpp -llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp -llvm/lib/ExecutionEngine/Orc/ObjectTransformLayer.cpp -llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp -llvm/lib/ExecutionEngine/Orc/Speculation.cpp -llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp -llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp -llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp -llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp -llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp -llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp -llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.cpp -llvm/lib/ExecutionEngine/Orc/TargetProcess/OrcRTBootstrap.h -llvm/lib/ExecutionEngine/Orc/TargetProcess/RegisterEHFrames.cpp -llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorDylibManager.cpp -llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp -llvm/lib/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.cpp -llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h -llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h -llvm/lib/FileCheck/FileCheckImpl.h -llvm/lib/Frontend/OpenACC/ACC.cpp -llvm/lib/Frontend/OpenMP/OMP.cpp -llvm/lib/Frontend/OpenMP/OMPContext.cpp -llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp -llvm/lib/FuzzMutate/OpDescriptor.cpp -llvm/lib/FuzzMutate/RandomIRBuilder.cpp -llvm/lib/InterfaceStub/ELFObjHandler.cpp -llvm/lib/InterfaceStub/IFSHandler.cpp -llvm/lib/InterfaceStub/IFSStub.cpp -llvm/lib/IR/Assumptions.cpp -llvm/lib/IR/Comdat.cpp -llvm/lib/IR/DebugInfoMetadata.cpp -llvm/lib/IR/DebugLoc.cpp -llvm/lib/IR/DIBuilder.cpp -llvm/lib/IR/FPEnv.cpp -llvm/lib/IR/GCStrategy.cpp -llvm/lib/IR/GVMaterializer.cpp -llvm/lib/IR/LLVMContextImpl.h -llvm/lib/IR/MetadataImpl.h -llvm/lib/IR/OptBisect.cpp -llvm/lib/IR/PassInstrumentation.cpp -llvm/lib/IR/PassManager.cpp -llvm/lib/IR/PrintPasses.cpp -llvm/lib/IR/PseudoProbe.cpp -llvm/lib/IR/ReplaceConstant.cpp -llvm/lib/IR/SSAContext.cpp -llvm/lib/IR/Statepoint.cpp -llvm/lib/IR/StructuralHash.cpp -llvm/lib/IR/ValueSymbolTable.cpp -llvm/lib/MC/MCAsmInfoCOFF.cpp -llvm/lib/MC/MCAsmInfoELF.cpp -llvm/lib/MC/MCAsmInfoGOFF.cpp -llvm/lib/MC/MCAsmInfoWasm.cpp -llvm/lib/MC/MCAsmInfoXCOFF.cpp -llvm/lib/MC/MCAsmMacro.cpp -llvm/lib/MC/MCCodeEmitter.cpp -llvm/lib/MC/MCInstrAnalysis.cpp -llvm/lib/MC/MCInstrDesc.cpp -llvm/lib/MC/MCInstrInfo.cpp -llvm/lib/MC/MCLinkerOptimizationHint.cpp -llvm/lib/MC/MCMachObjectTargetWriter.cpp -llvm/lib/MC/MCObjectWriter.cpp -llvm/lib/MC/MCPseudoProbe.cpp -llvm/lib/MC/MCSectionWasm.cpp -llvm/lib/MC/MCSymbolXCOFF.cpp -llvm/lib/MC/MCWasmObjectTargetWriter.cpp -llvm/lib/MC/MCWasmStreamer.cpp -llvm/lib/MC/MCXCOFFObjectTargetWriter.cpp -llvm/lib/MC/MCXCOFFStreamer.cpp -llvm/lib/MC/StringTableBuilder.cpp -llvm/lib/MC/MCDisassembler/Disassembler.h -llvm/lib/MC/MCDisassembler/MCRelocationInfo.cpp -llvm/lib/MC/MCDisassembler/MCSymbolizer.cpp -llvm/lib/MC/MCParser/GOFFAsmParser.cpp -llvm/lib/MC/MCParser/MCAsmParserExtension.cpp -llvm/lib/MC/MCParser/XCOFFAsmParser.cpp -llvm/lib/MCA/CodeEmitter.cpp -llvm/lib/MCA/Context.cpp -llvm/lib/MCA/CustomBehaviour.cpp -llvm/lib/MCA/HWEventListener.cpp -llvm/lib/MCA/InstrBuilder.cpp -llvm/lib/MCA/Instruction.cpp -llvm/lib/MCA/Pipeline.cpp -llvm/lib/MCA/View.cpp -llvm/lib/MCA/HardwareUnits/HardwareUnit.cpp -llvm/lib/MCA/HardwareUnits/RegisterFile.cpp -llvm/lib/MCA/HardwareUnits/ResourceManager.cpp -llvm/lib/MCA/HardwareUnits/Scheduler.cpp -llvm/lib/MCA/Stages/DispatchStage.cpp -llvm/lib/MCA/Stages/EntryStage.cpp -llvm/lib/MCA/Stages/ExecuteStage.cpp -llvm/lib/MCA/Stages/InOrderIssueStage.cpp -llvm/lib/MCA/Stages/MicroOpQueueStage.cpp -llvm/lib/MCA/Stages/RetireStage.cpp -llvm/lib/MCA/Stages/Stage.cpp -llvm/lib/ObjCopy/Archive.cpp -llvm/lib/ObjCopy/Archive.h -llvm/lib/ObjCopy/ConfigManager.cpp -llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp -llvm/lib/ObjCopy/COFF/COFFObject.cpp -llvm/lib/ObjCopy/COFF/COFFObject.h -llvm/lib/ObjCopy/COFF/COFFReader.cpp -llvm/lib/ObjCopy/COFF/COFFReader.h -llvm/lib/ObjCopy/COFF/COFFWriter.cpp -llvm/lib/ObjCopy/COFF/COFFWriter.h -llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp -llvm/lib/ObjCopy/ELF/ELFObject.cpp -llvm/lib/ObjCopy/ELF/ELFObject.h -llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp -llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h -llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp -llvm/lib/ObjCopy/MachO/MachOObject.cpp -llvm/lib/ObjCopy/MachO/MachOObject.h -llvm/lib/ObjCopy/MachO/MachOReader.cpp -llvm/lib/ObjCopy/MachO/MachOReader.h -llvm/lib/ObjCopy/MachO/MachOWriter.cpp -llvm/lib/ObjCopy/MachO/MachOWriter.h -llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp -llvm/lib/ObjCopy/wasm/WasmObject.cpp -llvm/lib/ObjCopy/wasm/WasmObject.h -llvm/lib/ObjCopy/wasm/WasmReader.cpp -llvm/lib/ObjCopy/wasm/WasmReader.h -llvm/lib/ObjCopy/wasm/WasmWriter.cpp -llvm/lib/ObjCopy/wasm/WasmWriter.h -llvm/lib/ObjCopy/XCOFF/XCOFFObject.h -llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp -llvm/lib/ObjCopy/XCOFF/XCOFFReader.h -llvm/lib/Object/Archive.cpp -llvm/lib/Object/Binary.cpp -llvm/lib/Object/Decompressor.cpp -llvm/lib/Object/FaultMapParser.cpp -llvm/lib/Object/IRObjectFile.cpp -llvm/lib/Object/IRSymtab.cpp -llvm/lib/Object/MachOUniversalWriter.cpp -llvm/lib/Object/Minidump.cpp -llvm/lib/Object/ModuleSymbolTable.cpp -llvm/lib/Object/ObjectFile.cpp -llvm/lib/Object/RecordStreamer.cpp -llvm/lib/Object/SymbolicFile.cpp -llvm/lib/Object/SymbolSize.cpp -llvm/lib/Object/TapiFile.cpp -llvm/lib/Object/TapiUniversal.cpp -llvm/lib/Object/WindowsMachineFlag.cpp -llvm/lib/ObjectYAML/ArchiveEmitter.cpp -llvm/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp -llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp -llvm/lib/ObjectYAML/CodeViewYAMLTypeHashing.cpp -llvm/lib/ObjectYAML/DWARFEmitter.cpp -llvm/lib/ObjectYAML/MachOEmitter.cpp -llvm/lib/ObjectYAML/ObjectYAML.cpp -llvm/lib/ObjectYAML/WasmYAML.cpp -llvm/lib/ObjectYAML/yaml2obj.cpp -llvm/lib/Passes/OptimizationLevel.cpp -llvm/lib/Passes/PassBuilderBindings.cpp -llvm/lib/Passes/PassPlugin.cpp -llvm/lib/ProfileData/GCOV.cpp -llvm/lib/ProfileData/InstrProfCorrelator.cpp -llvm/lib/ProfileData/MemProf.cpp -llvm/lib/ProfileData/SampleProfWriter.cpp -llvm/lib/Remarks/BitstreamRemarkParser.h -llvm/lib/Remarks/BitstreamRemarkSerializer.cpp -llvm/lib/Remarks/Remark.cpp -llvm/lib/Remarks/RemarkLinker.cpp -llvm/lib/Remarks/RemarkParser.cpp -llvm/lib/Remarks/RemarkSerializer.cpp -llvm/lib/Remarks/RemarkStreamer.cpp -llvm/lib/Remarks/RemarkStringTable.cpp -llvm/lib/Remarks/YAMLRemarkParser.h -llvm/lib/Remarks/YAMLRemarkSerializer.cpp -llvm/lib/Support/ABIBreak.cpp -llvm/lib/Support/ARMBuildAttrs.cpp -llvm/lib/Support/AutoConvert.cpp -llvm/lib/Support/BinaryStreamError.cpp -llvm/lib/Support/BinaryStreamReader.cpp -llvm/lib/Support/BinaryStreamRef.cpp -llvm/lib/Support/BinaryStreamWriter.cpp -llvm/lib/Support/BlockFrequency.cpp -llvm/lib/Support/BranchProbability.cpp -llvm/lib/Support/BuryPointer.cpp -llvm/lib/Support/Caching.cpp -llvm/lib/Support/CodeGenCoverage.cpp -llvm/lib/Support/COM.cpp -llvm/lib/Support/Compression.cpp -llvm/lib/Support/CRC.cpp -llvm/lib/Support/CSKYAttributeParser.cpp -llvm/lib/Support/CSKYAttributes.cpp -llvm/lib/Support/CSKYTargetParser.cpp -llvm/lib/Support/DebugOptions.h -llvm/lib/Support/DivisionByConstantInfo.cpp -llvm/lib/Support/DJB.cpp -llvm/lib/Support/ELFAttributeParser.cpp -llvm/lib/Support/ELFAttributes.cpp -llvm/lib/Support/ExtensibleRTTI.cpp -llvm/lib/Support/FormattedStream.cpp -llvm/lib/Support/GlobPattern.cpp -llvm/lib/Support/Hashing.cpp -llvm/lib/Support/InitLLVM.cpp -llvm/lib/Support/InstructionCost.cpp -llvm/lib/Support/IntEqClasses.cpp -llvm/lib/Support/LineIterator.cpp -llvm/lib/Support/LowLevelType.cpp -llvm/lib/Support/MemAlloc.cpp -llvm/lib/Support/Memory.cpp -llvm/lib/Support/MemoryBufferRef.cpp -llvm/lib/Support/MSP430AttributeParser.cpp -llvm/lib/Support/MSP430Attributes.cpp -llvm/lib/Support/Optional.cpp -llvm/lib/Support/Parallel.cpp -llvm/lib/Support/Program.cpp -llvm/lib/Support/RISCVAttributeParser.cpp -llvm/lib/Support/RISCVAttributes.cpp -llvm/lib/Support/ScopedPrinter.cpp -llvm/lib/Support/SHA1.cpp -llvm/lib/Support/SHA256.cpp -llvm/lib/Support/Signposts.cpp -llvm/lib/Support/SourceMgr.cpp -llvm/lib/Support/StringExtras.cpp -llvm/lib/Support/StringMap.cpp -llvm/lib/Support/StringSaver.cpp -llvm/lib/Support/SuffixTree.cpp -llvm/lib/Support/SystemUtils.cpp -llvm/lib/Support/TarWriter.cpp -llvm/lib/Support/ThreadPool.cpp -llvm/lib/Support/TimeProfiler.cpp -llvm/lib/Support/ToolOutputFile.cpp -llvm/lib/Support/TypeSize.cpp -llvm/lib/Support/UnicodeCaseFold.cpp -llvm/lib/Support/VersionTuple.cpp -llvm/lib/Support/Watchdog.cpp -llvm/lib/Support/WithColor.cpp -llvm/lib/TableGen/Parser.cpp -llvm/lib/TableGen/RecordContext.h -llvm/lib/TableGen/TableGenBackendSkeleton.cpp -llvm/lib/Target/TargetIntrinsicInfo.cpp -llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp -llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp -llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp -llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h -llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp -llvm/lib/Target/AArch64/AArch64StackTagging.cpp -llvm/lib/Target/AArch64/AArch64TargetObjectFile.h -llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h -llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp -llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.h -llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp -llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp -llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h -llvm/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h -llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h -llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp -llvm/lib/Target/AArch64/MCTargetDesc/AArch64WinCOFFObjectWriter.cpp -llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.h -llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp -llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp -llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp -llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h -llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp -llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp -llvm/lib/Target/AMDGPU/AMDGPUExportClustering.h -llvm/lib/Target/AMDGPU/AMDGPUFixFunctionBitcasts.cpp -llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp -llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h -llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp -llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp -llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h -llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp -llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp -llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h -llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp -llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h -llvm/lib/Target/AMDGPU/AMDGPUPTNote.h -llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp -llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h -llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp -llvm/lib/Target/AMDGPU/GCNPreRAOptimizations.cpp -llvm/lib/Target/AMDGPU/R600.h -llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp -llvm/lib/Target/AMDGPU/R600MCInstLower.cpp -llvm/lib/Target/AMDGPU/R600RegisterInfo.h -llvm/lib/Target/AMDGPU/R600Subtarget.cpp -llvm/lib/Target/AMDGPU/R600TargetMachine.cpp -llvm/lib/Target/AMDGPU/R600TargetMachine.h -llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp -llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h -llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp -llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp -llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp -llvm/lib/Target/AMDGPU/SIModeRegister.cpp -llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp -llvm/lib/Target/AMDGPU/SIProgramInfo.cpp -llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp -llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h -llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h -llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h -llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp -llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h -llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp -llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp -llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp -llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h -llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h -llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h -llvm/lib/Target/ARC/ARC.h -llvm/lib/Target/ARC/ARCAsmPrinter.cpp -llvm/lib/Target/ARC/ARCBranchFinalize.cpp -llvm/lib/Target/ARC/ARCExpandPseudos.cpp -llvm/lib/Target/ARC/ARCFrameLowering.h -llvm/lib/Target/ARC/ARCISelDAGToDAG.cpp -llvm/lib/Target/ARC/ARCISelLowering.cpp -llvm/lib/Target/ARC/ARCISelLowering.h -llvm/lib/Target/ARC/ARCMachineFunctionInfo.cpp -llvm/lib/Target/ARC/ARCMachineFunctionInfo.h -llvm/lib/Target/ARC/ARCMCInstLower.cpp -llvm/lib/Target/ARC/ARCMCInstLower.h -llvm/lib/Target/ARC/ARCRegisterInfo.cpp -llvm/lib/Target/ARC/ARCRegisterInfo.h -llvm/lib/Target/ARC/ARCSubtarget.cpp -llvm/lib/Target/ARC/ARCSubtarget.h -llvm/lib/Target/ARC/ARCTargetMachine.h -llvm/lib/Target/ARC/ARCTargetStreamer.h -llvm/lib/Target/ARC/ARCTargetTransformInfo.h -llvm/lib/Target/ARC/MCTargetDesc/ARCInfo.h -llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h -llvm/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.cpp -llvm/lib/Target/ARC/MCTargetDesc/ARCMCAsmInfo.h -llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.cpp -llvm/lib/Target/ARC/MCTargetDesc/ARCMCTargetDesc.h -llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.cpp -llvm/lib/Target/ARC/TargetInfo/ARCTargetInfo.h -llvm/lib/Target/ARM/ARMBlockPlacement.cpp -llvm/lib/Target/ARM/ARMBranchTargets.cpp -llvm/lib/Target/ARM/ARMCallingConv.h -llvm/lib/Target/ARM/ARMHazardRecognizer.h -llvm/lib/Target/ARM/ARMInstrInfo.cpp -llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp -llvm/lib/Target/ARM/ARMTargetMachine.h -llvm/lib/Target/ARM/ARMTargetObjectFile.h -llvm/lib/Target/ARM/MVETailPredUtils.h -llvm/lib/Target/ARM/MVEVPTBlockPass.cpp -llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h -llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h -llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp -llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.h -llvm/lib/Target/AVR/AVR.h -llvm/lib/Target/AVR/AVRAsmPrinter.cpp -llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp -llvm/lib/Target/AVR/AVRFrameLowering.h -llvm/lib/Target/AVR/AVRInstrInfo.cpp -llvm/lib/Target/AVR/AVRInstrInfo.h -llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp -llvm/lib/Target/AVR/AVRISelLowering.cpp -llvm/lib/Target/AVR/AVRISelLowering.h -llvm/lib/Target/AVR/AVRMachineFunctionInfo.h -llvm/lib/Target/AVR/AVRMCInstLower.cpp -llvm/lib/Target/AVR/AVRMCInstLower.h -llvm/lib/Target/AVR/AVRRegisterInfo.cpp -llvm/lib/Target/AVR/AVRRegisterInfo.h -llvm/lib/Target/AVR/AVRSelectionDAGInfo.h -llvm/lib/Target/AVR/AVRShiftExpand.cpp -llvm/lib/Target/AVR/AVRSubtarget.cpp -llvm/lib/Target/AVR/AVRSubtarget.h -llvm/lib/Target/AVR/AVRTargetMachine.cpp -llvm/lib/Target/AVR/AVRTargetMachine.h -llvm/lib/Target/AVR/AVRTargetObjectFile.cpp -llvm/lib/Target/AVR/AVRTargetObjectFile.h -llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp -llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h -llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.h -llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h -llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h -llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h -llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h -llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h -llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h -llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h -llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp -llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.h -llvm/lib/Target/BPF/BPFAdjustOpt.cpp -llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp -llvm/lib/Target/BPF/BPFCORE.h -llvm/lib/Target/BPF/BPFFrameLowering.cpp -llvm/lib/Target/BPF/BPFIRPeephole.cpp -llvm/lib/Target/BPF/BPFMCInstLower.cpp -llvm/lib/Target/BPF/BPFPreserveDIType.cpp -llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp -llvm/lib/Target/BPF/BPFSubtarget.cpp -llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp -llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.h -llvm/lib/Target/CSKY/CSKY.h -llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp -llvm/lib/Target/CSKY/CSKYAsmPrinter.h -llvm/lib/Target/CSKY/CSKYCallingConv.h -llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp -llvm/lib/Target/CSKY/CSKYConstantPoolValue.cpp -llvm/lib/Target/CSKY/CSKYConstantPoolValue.h -llvm/lib/Target/CSKY/CSKYFrameLowering.cpp -llvm/lib/Target/CSKY/CSKYFrameLowering.h -llvm/lib/Target/CSKY/CSKYInstrInfo.cpp -llvm/lib/Target/CSKY/CSKYInstrInfo.h -llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp -llvm/lib/Target/CSKY/CSKYISelLowering.cpp -llvm/lib/Target/CSKY/CSKYISelLowering.h -llvm/lib/Target/CSKY/CSKYMachineFunctionInfo.h -llvm/lib/Target/CSKY/CSKYMCInstLower.cpp -llvm/lib/Target/CSKY/CSKYMCInstLower.h -llvm/lib/Target/CSKY/CSKYRegisterInfo.cpp -llvm/lib/Target/CSKY/CSKYRegisterInfo.h -llvm/lib/Target/CSKY/CSKYSubtarget.cpp -llvm/lib/Target/CSKY/CSKYSubtarget.h -llvm/lib/Target/CSKY/CSKYTargetMachine.cpp -llvm/lib/Target/CSKY/CSKYTargetMachine.h -llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp -llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h -llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h -llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYFixupKinds.h -llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCAsmInfo.h -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.h -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp -llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp -llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.h -llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.h -llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp -llvm/lib/Target/Hexagon/HexagonMachineScheduler.h -llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp -llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp -llvm/lib/Target/Hexagon/HexagonVectorLoopCarriedReuse.h -llvm/lib/Target/Hexagon/MCTargetDesc/HexagonFixupKinds.h -llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h -llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h -llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h -llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h -llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCShuffler.h -llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.h -llvm/lib/Target/Lanai/Lanai.h -llvm/lib/Target/Lanai/LanaiAluCode.h -llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp -llvm/lib/Target/Lanai/LanaiCondCode.h -llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp -llvm/lib/Target/Lanai/LanaiFrameLowering.cpp -llvm/lib/Target/Lanai/LanaiFrameLowering.h -llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp -llvm/lib/Target/Lanai/LanaiISelLowering.h -llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.cpp -llvm/lib/Target/Lanai/LanaiMachineFunctionInfo.h -llvm/lib/Target/Lanai/LanaiMCInstLower.h -llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp -llvm/lib/Target/Lanai/LanaiRegisterInfo.h -llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.cpp -llvm/lib/Target/Lanai/LanaiSelectionDAGInfo.h -llvm/lib/Target/Lanai/LanaiSubtarget.cpp -llvm/lib/Target/Lanai/LanaiSubtarget.h -llvm/lib/Target/Lanai/LanaiTargetMachine.cpp -llvm/lib/Target/Lanai/LanaiTargetObjectFile.h -llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp -llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.h -llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp -llvm/lib/Target/Lanai/MCTargetDesc/LanaiBaseInfo.h -llvm/lib/Target/Lanai/MCTargetDesc/LanaiELFObjectWriter.cpp -llvm/lib/Target/Lanai/MCTargetDesc/LanaiFixupKinds.h -llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h -llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp -llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.h -llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp -llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.cpp -llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCExpr.h -llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp -llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h -llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp -llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h -llvm/lib/Target/LoongArch/LoongArch.h -llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp -llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h -llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp -llvm/lib/Target/LoongArch/LoongArchFrameLowering.h -llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp -llvm/lib/Target/LoongArch/LoongArchInstrInfo.h -llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp -llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h -llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp -llvm/lib/Target/LoongArch/LoongArchISelLowering.h -llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h -llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp -llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp -llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h -llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp -llvm/lib/Target/LoongArch/LoongArchSubtarget.h -llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp -llvm/lib/Target/LoongArch/LoongArchTargetMachine.h -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp -llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h -llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp -llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h -llvm/lib/Target/M68k/M68k.h -llvm/lib/Target/M68k/M68kAsmPrinter.cpp -llvm/lib/Target/M68k/M68kAsmPrinter.h -llvm/lib/Target/M68k/M68kCallingConv.h -llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp -llvm/lib/Target/M68k/M68kExpandPseudo.cpp -llvm/lib/Target/M68k/M68kFrameLowering.cpp -llvm/lib/Target/M68k/M68kFrameLowering.h -llvm/lib/Target/M68k/M68kInstrBuilder.h -llvm/lib/Target/M68k/M68kInstrInfo.cpp -llvm/lib/Target/M68k/M68kInstrInfo.h -llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp -llvm/lib/Target/M68k/M68kISelLowering.h -llvm/lib/Target/M68k/M68kMachineFunction.cpp -llvm/lib/Target/M68k/M68kMachineFunction.h -llvm/lib/Target/M68k/M68kMCInstLower.cpp -llvm/lib/Target/M68k/M68kMCInstLower.h -llvm/lib/Target/M68k/M68kRegisterInfo.cpp -llvm/lib/Target/M68k/M68kRegisterInfo.h -llvm/lib/Target/M68k/M68kSubtarget.cpp -llvm/lib/Target/M68k/M68kSubtarget.h -llvm/lib/Target/M68k/M68kTargetMachine.cpp -llvm/lib/Target/M68k/M68kTargetMachine.h -llvm/lib/Target/M68k/M68kTargetObjectFile.cpp -llvm/lib/Target/M68k/M68kTargetObjectFile.h -llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp -llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp -llvm/lib/Target/M68k/GISel/M68kCallLowering.h -llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp -llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.cpp -llvm/lib/Target/M68k/GISel/M68kLegalizerInfo.h -llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.cpp -llvm/lib/Target/M68k/GISel/M68kRegisterBankInfo.h -llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp -llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h -llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp -llvm/lib/Target/M68k/MCTargetDesc/M68kFixupKinds.h -llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp -llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h -llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp -llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.h -llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp -llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h -llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp -llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp -llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.h -llvm/lib/Target/Mips/Mips16RegisterInfo.h -llvm/lib/Target/Mips/MipsCallLowering.h -llvm/lib/Target/Mips/MipsLegalizerInfo.h -llvm/lib/Target/Mips/MipsMCInstLower.h -llvm/lib/Target/Mips/MipsMulMulBugPass.cpp -llvm/lib/Target/Mips/MipsOptionRecord.h -llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp -llvm/lib/Target/Mips/MipsRegisterBankInfo.h -llvm/lib/Target/Mips/MipsSEFrameLowering.h -llvm/lib/Target/Mips/MipsSERegisterInfo.h -llvm/lib/Target/Mips/MipsTargetMachine.cpp -llvm/lib/Target/Mips/MipsTargetMachine.h -llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp -llvm/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h -llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp -llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h -llvm/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h -llvm/lib/Target/Mips/MCTargetDesc/MipsOptionRecord.cpp -llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp -llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.h -llvm/lib/Target/MSP430/MCTargetDesc/MSP430FixupKinds.h -llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h -llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp -llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp -llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.h -llvm/lib/Target/NVPTX/ManagedStringPool.h -llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp -llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.h -llvm/lib/Target/NVPTX/NVPTXAtomicLower.cpp -llvm/lib/Target/NVPTX/NVPTXAtomicLower.h -llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp -llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp -llvm/lib/Target/NVPTX/NVPTXISelLowering.h -llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp -llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h -llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h -llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp -llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h -llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp -llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h -llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXTargetStreamer.h -llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp -llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h -llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp -llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp -llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp -llvm/lib/Target/PowerPC/PPCTargetMachine.h -llvm/lib/Target/PowerPC/PPCTargetStreamer.h -llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp -llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h -llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp -llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp -llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.h -llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp -llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h -llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h -llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp -llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp -llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp -llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.h -llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp -llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h -llvm/lib/Target/RISCV/RISCVCallLowering.cpp -llvm/lib/Target/RISCV/RISCVCallLowering.h -llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp -llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp -llvm/lib/Target/RISCV/RISCVLegalizerInfo.cpp -llvm/lib/Target/RISCV/RISCVLegalizerInfo.h -llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h -llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp -llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp -llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp -llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h -llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp -llvm/lib/Target/RISCV/RISCVTargetMachine.h -llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp -llvm/lib/Target/RISCV/RISCVTargetObjectFile.h -llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp -llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h -llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp -llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp -llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h -llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp -llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h -llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp -llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp -llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h -llvm/lib/Target/Sparc/LeonPasses.h -llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp -llvm/lib/Target/Sparc/SparcTargetObjectFile.h -llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h -llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp -llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.cpp -llvm/lib/Target/Sparc/MCTargetDesc/SparcTargetStreamer.h -llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp -llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.h -llvm/lib/Target/SystemZ/SystemZAsmPrinter.h -llvm/lib/Target/SystemZ/SystemZInstrBuilder.h -llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h -llvm/lib/Target/SystemZ/SystemZSubtarget.cpp -llvm/lib/Target/SystemZ/SystemZTargetStreamer.h -llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h -llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp -llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h -llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h -llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h -llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp -llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h -llvm/lib/Target/VE/LVLGen.cpp -llvm/lib/Target/VE/VEAsmPrinter.cpp -llvm/lib/Target/VE/VECustomDAG.cpp -llvm/lib/Target/VE/VECustomDAG.h -llvm/lib/Target/VE/VEFrameLowering.h -llvm/lib/Target/VE/VEInstrBuilder.h -llvm/lib/Target/VE/VEInstrInfo.h -llvm/lib/Target/VE/VEISelDAGToDAG.cpp -llvm/lib/Target/VE/VEMachineFunctionInfo.cpp -llvm/lib/Target/VE/VEMachineFunctionInfo.h -llvm/lib/Target/VE/VEMCInstLower.cpp -llvm/lib/Target/VE/VERegisterInfo.cpp -llvm/lib/Target/VE/VERegisterInfo.h -llvm/lib/Target/VE/VESubtarget.cpp -llvm/lib/Target/VE/VESubtarget.h -llvm/lib/Target/VE/VETargetMachine.cpp -llvm/lib/Target/VE/VETargetMachine.h -llvm/lib/Target/VE/VETargetTransformInfo.h -llvm/lib/Target/VE/VVPISelLowering.cpp -llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp -llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp -llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp -llvm/lib/Target/VE/MCTargetDesc/VEELFObjectWriter.cpp -llvm/lib/Target/VE/MCTargetDesc/VEFixupKinds.h -llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp -llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h -llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp -llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.h -llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp -llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.cpp -llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h -llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp -llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.h -llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.cpp -llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h -llvm/lib/Target/VE/TargetInfo/VETargetInfo.h -llvm/lib/Target/WebAssembly/WebAssembly.h -llvm/lib/Target/WebAssembly/WebAssemblyAddMissingPrototypes.cpp -llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp -llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h -llvm/lib/Target/WebAssembly/WebAssemblyCFGSort.cpp -llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp -llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp -llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h -llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.h -llvm/lib/Target/WebAssembly/WebAssemblyFixBrTableDefaults.cpp -llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp -llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h -llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp -llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h -llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h -llvm/lib/Target/WebAssembly/WebAssemblyLateEHPrepare.cpp -llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp -llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp -llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp -llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h -llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h -llvm/lib/Target/WebAssembly/WebAssemblyMemIntrinsicResults.cpp -llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp -llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp -llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp -llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp -llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp -llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp -llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h -llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp -llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp -llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp -llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.h -llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h -llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp -llvm/lib/Target/WebAssembly/WebAssemblySortRegion.h -llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp -llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h -llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp -llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h -llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp -llvm/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h -llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp -llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h -llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyFixupKinds.h -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp -llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp -llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp -llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.h -llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp -llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h -llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp -llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h -llvm/lib/Target/X86/X86CallLowering.cpp -llvm/lib/Target/X86/X86CallLowering.h -llvm/lib/Target/X86/X86FastTileConfig.cpp -llvm/lib/Target/X86/X86InsertPrefetch.cpp -llvm/lib/Target/X86/X86InsertWait.cpp -llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp -llvm/lib/Target/X86/X86InterleavedAccess.cpp -llvm/lib/Target/X86/X86LegalizerInfo.h -llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp -llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp -llvm/lib/Target/X86/X86LowerAMXType.cpp -llvm/lib/Target/X86/X86LowerTileCopy.cpp -llvm/lib/Target/X86/X86PreTileConfig.cpp -llvm/lib/Target/X86/X86RegisterBankInfo.h -llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp -llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp -llvm/lib/Target/X86/X86TargetMachine.h -llvm/lib/Target/X86/X86TileConfig.cpp -llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp -llvm/lib/Target/X86/MCA/X86CustomBehaviour.h -llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h -llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp -llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h -llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h -llvm/lib/Target/X86/MCTargetDesc/X86TargetStreamer.h -llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp -llvm/lib/Target/XCore/XCoreTargetMachine.h -llvm/lib/Target/XCore/XCoreTargetTransformInfo.h -llvm/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h -llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp -llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h -llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp -llvm/lib/Testing/Annotations/Annotations.cpp -llvm/lib/Testing/Support/Error.cpp -llvm/lib/Testing/Support/SupportHelpers.cpp -llvm/lib/TextAPI/ArchitectureSet.cpp -llvm/lib/TextAPI/InterfaceFile.cpp -llvm/lib/TextAPI/PackedVersion.cpp -llvm/lib/TextAPI/Platform.cpp -llvm/lib/TextAPI/Symbol.cpp -llvm/lib/TextAPI/Target.cpp -llvm/lib/TextAPI/TextAPIContext.h -llvm/lib/TextAPI/TextStub.cpp -llvm/lib/TextAPI/TextStubCommon.cpp -llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h -llvm/lib/Transforms/CFGuard/CFGuard.cpp -llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp -llvm/lib/Transforms/Instrumentation/CFGMST.h -llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp -llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp -llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp -llvm/lib/Transforms/Instrumentation/MemProfiler.cpp -llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp -llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h -llvm/lib/Transforms/IPO/Annotation2Metadata.cpp -llvm/lib/Transforms/IPO/Attributor.cpp -llvm/lib/Transforms/IPO/ElimAvailExtern.cpp -llvm/lib/Transforms/IPO/ModuleInliner.cpp -llvm/lib/Transforms/IPO/OpenMPOpt.cpp -llvm/lib/Transforms/IPO/SampleContextTracker.cpp -llvm/lib/Transforms/IPO/SampleProfileProbe.cpp -llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp -llvm/lib/Transforms/ObjCARC/BlotMapVector.h -llvm/lib/Transforms/ObjCARC/ObjCARCExpand.cpp -llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.h -llvm/lib/Transforms/ObjCARC/PtrState.h -llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp -llvm/lib/Transforms/Scalar/ConstraintElimination.cpp -llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp -llvm/lib/Transforms/Scalar/DivRemPairs.cpp -llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp -llvm/lib/Transforms/Scalar/GVNHoist.cpp -llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp -llvm/lib/Transforms/Scalar/IVUsersPrinter.cpp -llvm/lib/Transforms/Scalar/LoopAccessAnalysisPrinter.cpp -llvm/lib/Transforms/Scalar/LoopBoundSplit.cpp -llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp -llvm/lib/Transforms/Scalar/LoopPassManager.cpp -llvm/lib/Transforms/Scalar/LoopRotation.cpp -llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp -llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp -llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp -llvm/lib/Transforms/Utils/CallGraphUpdater.cpp -llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp -llvm/lib/Transforms/Utils/CanonicalizeFreezeInLoops.cpp -llvm/lib/Transforms/Utils/CodeLayout.cpp -llvm/lib/Transforms/Utils/CodeMoverUtils.cpp -llvm/lib/Transforms/Utils/EscapeEnumerator.cpp -llvm/lib/Transforms/Utils/FunctionComparator.cpp -llvm/lib/Transforms/Utils/FunctionImportUtils.cpp -llvm/lib/Transforms/Utils/GlobalStatus.cpp -llvm/lib/Transforms/Utils/HelloWorld.cpp -llvm/lib/Transforms/Utils/InjectTLIMappings.cpp -llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp -llvm/lib/Transforms/Utils/MatrixUtils.cpp -llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp -llvm/lib/Transforms/Utils/SampleProfileInference.cpp -llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp -llvm/lib/Transforms/Utils/SCCPSolver.cpp -llvm/lib/Transforms/Utils/SSAUpdaterBulk.cpp -llvm/lib/Transforms/Utils/Utils.cpp -llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h -llvm/lib/Transforms/Vectorize/Vectorize.cpp -llvm/lib/Transforms/Vectorize/VPlanDominatorTree.h -llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp -llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h -llvm/lib/Transforms/Vectorize/VPlanLoopInfo.h -llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp -llvm/lib/Transforms/Vectorize/VPlanPredicator.h -llvm/lib/Transforms/Vectorize/VPlanSLP.cpp -llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp -llvm/lib/Transforms/Vectorize/VPlanTransforms.h -llvm/lib/Transforms/Vectorize/VPlanValue.h -llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp -llvm/lib/WindowsDriver/MSVCPaths.cpp -llvm/lib/WindowsManifest/WindowsManifestMerger.cpp -llvm/lib/XRay/BlockIndexer.cpp -llvm/lib/XRay/BlockVerifier.cpp -llvm/lib/XRay/FDRRecordProducer.cpp -llvm/lib/XRay/FDRRecords.cpp -llvm/lib/XRay/FDRTraceExpander.cpp -llvm/lib/XRay/FileHeaderReader.cpp -llvm/lib/XRay/InstrumentationMap.cpp -llvm/lib/XRay/LogBuilderConsumer.cpp -llvm/lib/XRay/Profile.cpp -llvm/lib/XRay/RecordPrinter.cpp -llvm/lib/XRay/Trace.cpp -llvm/tools/bugpoint/Miscompilation.cpp -llvm/tools/dsymutil/BinaryHolder.cpp -llvm/tools/dsymutil/BinaryHolder.h -llvm/tools/dsymutil/CFBundle.cpp -llvm/tools/dsymutil/CFBundle.h -llvm/tools/dsymutil/DebugMap.cpp -llvm/tools/dsymutil/DebugMap.h -llvm/tools/dsymutil/dsymutil.cpp -llvm/tools/dsymutil/dsymutil.h -llvm/tools/dsymutil/DwarfLinkerForBinary.cpp -llvm/tools/dsymutil/LinkUtils.h -llvm/tools/dsymutil/MachODebugMapParser.cpp -llvm/tools/dsymutil/MachOUtils.h -llvm/tools/dsymutil/Reproducer.cpp -llvm/tools/dsymutil/Reproducer.h -llvm/tools/dsymutil/SymbolMap.cpp -llvm/tools/dsymutil/SymbolMap.h -llvm/tools/lli/ExecutionUtils.cpp -llvm/tools/lli/ExecutionUtils.h -llvm/tools/lli/ForwardingMemoryManager.h -llvm/tools/lli/ChildTarget/ChildTarget.cpp -llvm/tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp -llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp -llvm/tools/llvm-cfi-verify/lib/GraphBuilder.h -llvm/tools/llvm-cov/CoverageExporter.h -llvm/tools/llvm-cov/CoverageExporterJson.h -llvm/tools/llvm-cov/CoverageExporterLcov.cpp -llvm/tools/llvm-cov/CoverageExporterLcov.h -llvm/tools/llvm-cov/CoverageFilters.cpp -llvm/tools/llvm-cov/CoverageSummaryInfo.cpp -llvm/tools/llvm-cov/CoverageSummaryInfo.h -llvm/tools/llvm-cov/llvm-cov.cpp -llvm/tools/llvm-cov/RenderingSupport.h -llvm/tools/llvm-cov/SourceCoverageViewHTML.h -llvm/tools/llvm-cov/SourceCoverageViewText.h -llvm/tools/llvm-cov/TestingSupport.cpp -llvm/tools/llvm-cxxdump/Error.cpp -llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp -llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp -llvm/tools/llvm-dis-fuzzer/llvm-dis-fuzzer.cpp -llvm/tools/llvm-dlang-demangle-fuzzer/DummyDemanglerFuzzer.cpp -llvm/tools/llvm-dlang-demangle-fuzzer/llvm-dlang-demangle-fuzzer.cpp -llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp -llvm/tools/llvm-dwarfdump/llvm-dwarfdump.h -llvm/tools/llvm-dwarfdump/SectionSizes.cpp -llvm/tools/llvm-exegesis/lib/Analysis.h -llvm/tools/llvm-exegesis/lib/Assembler.h -llvm/tools/llvm-exegesis/lib/BenchmarkCode.h -llvm/tools/llvm-exegesis/lib/BenchmarkResult.h -llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp -llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h -llvm/tools/llvm-exegesis/lib/Clustering.h -llvm/tools/llvm-exegesis/lib/CodeTemplate.h -llvm/tools/llvm-exegesis/lib/Error.cpp -llvm/tools/llvm-exegesis/lib/Error.h -llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp -llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h -llvm/tools/llvm-exegesis/lib/MCInstrDescView.h -llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.cpp -llvm/tools/llvm-exegesis/lib/ParallelSnippetGenerator.h -llvm/tools/llvm-exegesis/lib/PerfHelper.h -llvm/tools/llvm-exegesis/lib/RegisterAliasing.cpp -llvm/tools/llvm-exegesis/lib/RegisterAliasing.h -llvm/tools/llvm-exegesis/lib/RegisterValue.cpp -llvm/tools/llvm-exegesis/lib/RegisterValue.h -llvm/tools/llvm-exegesis/lib/SchedClassResolution.cpp -llvm/tools/llvm-exegesis/lib/SchedClassResolution.h -llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.h -llvm/tools/llvm-exegesis/lib/SnippetFile.cpp -llvm/tools/llvm-exegesis/lib/SnippetFile.h -llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp -llvm/tools/llvm-exegesis/lib/SnippetGenerator.h -llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp -llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h -llvm/tools/llvm-exegesis/lib/Target.h -llvm/tools/llvm-exegesis/lib/TargetSelect.h -llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.cpp -llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h -llvm/tools/llvm-exegesis/lib/AArch64/Target.cpp -llvm/tools/llvm-exegesis/lib/PowerPC/Target.cpp -llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp -llvm/tools/llvm-exegesis/lib/X86/X86Counter.h -llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp -llvm/tools/llvm-ifs/ErrorCollector.cpp -llvm/tools/llvm-ifs/ErrorCollector.h -llvm/tools/llvm-isel-fuzzer/DummyISelFuzzer.cpp -llvm/tools/llvm-itanium-demangle-fuzzer/DummyDemanglerFuzzer.cpp -llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp -llvm/tools/llvm-jitlink/llvm-jitlink.h -llvm/tools/llvm-jitlink/llvm-jitlink-executor/llvm-jitlink-executor.cpp -llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp -llvm/tools/llvm-link/llvm-link.cpp -llvm/tools/llvm-mc/Disassembler.h -llvm/tools/llvm-mca/CodeRegion.cpp -llvm/tools/llvm-mca/CodeRegion.h -llvm/tools/llvm-mca/CodeRegionGenerator.cpp -llvm/tools/llvm-mca/CodeRegionGenerator.h -llvm/tools/llvm-mca/llvm-mca.cpp -llvm/tools/llvm-mca/PipelinePrinter.cpp -llvm/tools/llvm-mca/PipelinePrinter.h -llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp -llvm/tools/llvm-mca/Views/DispatchStatistics.cpp -llvm/tools/llvm-mca/Views/DispatchStatistics.h -llvm/tools/llvm-mca/Views/InstructionInfoView.cpp -llvm/tools/llvm-mca/Views/InstructionInfoView.h -llvm/tools/llvm-mca/Views/InstructionView.cpp -llvm/tools/llvm-mca/Views/InstructionView.h -llvm/tools/llvm-mca/Views/RegisterFileStatistics.cpp -llvm/tools/llvm-mca/Views/RegisterFileStatistics.h -llvm/tools/llvm-mca/Views/ResourcePressureView.cpp -llvm/tools/llvm-mca/Views/ResourcePressureView.h -llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.cpp -llvm/tools/llvm-mca/Views/RetireControlUnitStatistics.h -llvm/tools/llvm-mca/Views/SchedulerStatistics.cpp -llvm/tools/llvm-mca/Views/SchedulerStatistics.h -llvm/tools/llvm-mca/Views/SummaryView.cpp -llvm/tools/llvm-mca/Views/SummaryView.h -llvm/tools/llvm-mca/Views/TimelineView.cpp -llvm/tools/llvm-mca/Views/TimelineView.h -llvm/tools/llvm-microsoft-demangle-fuzzer/DummyDemanglerFuzzer.cpp -llvm/tools/llvm-microsoft-demangle-fuzzer/llvm-microsoft-demangle-fuzzer.cpp -llvm/tools/llvm-ml/Disassembler.h -llvm/tools/llvm-modextract/llvm-modextract.cpp -llvm/tools/llvm-objcopy/llvm-objcopy.cpp -llvm/tools/llvm-objcopy/ObjcopyOptions.h -llvm/tools/llvm-objdump/COFFDump.h -llvm/tools/llvm-objdump/ELFDump.h -llvm/tools/llvm-objdump/MachODump.h -llvm/tools/llvm-objdump/ObjdumpOptID.h -llvm/tools/llvm-objdump/SourcePrinter.cpp -llvm/tools/llvm-objdump/SourcePrinter.h -llvm/tools/llvm-objdump/WasmDump.cpp -llvm/tools/llvm-objdump/WasmDump.h -llvm/tools/llvm-objdump/XCOFFDump.cpp -llvm/tools/llvm-objdump/XCOFFDump.h -llvm/tools/llvm-pdbutil/BytesOutputStyle.cpp -llvm/tools/llvm-pdbutil/BytesOutputStyle.h -llvm/tools/llvm-pdbutil/DumpOutputStyle.h -llvm/tools/llvm-pdbutil/ExplainOutputStyle.h -llvm/tools/llvm-pdbutil/FormatUtil.cpp -llvm/tools/llvm-pdbutil/InputFile.h -llvm/tools/llvm-pdbutil/MinimalSymbolDumper.h -llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp -llvm/tools/llvm-pdbutil/MinimalTypeDumper.h -llvm/tools/llvm-pdbutil/PrettyBuiltinDumper.cpp -llvm/tools/llvm-pdbutil/PrettyEnumDumper.cpp -llvm/tools/llvm-pdbutil/PrettyExternalSymbolDumper.cpp -llvm/tools/llvm-pdbutil/PrettyTypeDumper.cpp -llvm/tools/llvm-pdbutil/TypeReferenceTracker.h -llvm/tools/llvm-pdbutil/YAMLOutputStyle.h -llvm/tools/llvm-profgen/CallContext.h -llvm/tools/llvm-profgen/CSPreInliner.cpp -llvm/tools/llvm-profgen/CSPreInliner.h -llvm/tools/llvm-profgen/llvm-profgen.cpp -llvm/tools/llvm-profgen/PerfReader.cpp -llvm/tools/llvm-profgen/PerfReader.h -llvm/tools/llvm-rc/ResourceScriptCppFilter.cpp -llvm/tools/llvm-rc/ResourceScriptCppFilter.h -llvm/tools/llvm-rc/ResourceScriptParser.h -llvm/tools/llvm-rc/ResourceScriptStmt.cpp -llvm/tools/llvm-rc/ResourceScriptToken.h -llvm/tools/llvm-rc/ResourceVisitor.h -llvm/tools/llvm-readobj/ObjDumper.cpp -llvm/tools/llvm-readobj/WindowsResourceDumper.cpp -llvm/tools/llvm-readobj/WindowsResourceDumper.h -llvm/tools/llvm-reduce/DeltaManager.cpp -llvm/tools/llvm-reduce/DeltaManager.h -llvm/tools/llvm-reduce/ReducerWorkItem.cpp -llvm/tools/llvm-reduce/ReducerWorkItem.h -llvm/tools/llvm-reduce/TestRunner.cpp -llvm/tools/llvm-reduce/TestRunner.h -llvm/tools/llvm-reduce/deltas/Delta.cpp -llvm/tools/llvm-reduce/deltas/Delta.h -llvm/tools/llvm-reduce/deltas/ReduceAliases.cpp -llvm/tools/llvm-reduce/deltas/ReduceAliases.h -llvm/tools/llvm-reduce/deltas/ReduceArguments.h -llvm/tools/llvm-reduce/deltas/ReduceAttributes.cpp -llvm/tools/llvm-reduce/deltas/ReduceAttributes.h -llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.cpp -llvm/tools/llvm-reduce/deltas/ReduceBasicBlocks.h -llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.cpp -llvm/tools/llvm-reduce/deltas/ReduceFunctionBodies.h -llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp -llvm/tools/llvm-reduce/deltas/ReduceFunctions.h -llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.cpp -llvm/tools/llvm-reduce/deltas/ReduceGlobalObjects.h -llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.cpp -llvm/tools/llvm-reduce/deltas/ReduceGlobalValues.h -llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.cpp -llvm/tools/llvm-reduce/deltas/ReduceGlobalVarInitializers.h -llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.cpp -llvm/tools/llvm-reduce/deltas/ReduceGlobalVars.h -llvm/tools/llvm-reduce/deltas/ReduceInstructions.cpp -llvm/tools/llvm-reduce/deltas/ReduceInstructions.h -llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.cpp -llvm/tools/llvm-reduce/deltas/ReduceInstructionsMIR.h -llvm/tools/llvm-reduce/deltas/ReduceMetadata.cpp -llvm/tools/llvm-reduce/deltas/ReduceMetadata.h -llvm/tools/llvm-reduce/deltas/ReduceModuleData.cpp -llvm/tools/llvm-reduce/deltas/ReduceModuleData.h -llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.cpp -llvm/tools/llvm-reduce/deltas/ReduceOperandBundles.h -llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp -llvm/tools/llvm-reduce/deltas/ReduceOperands.h -llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.cpp -llvm/tools/llvm-reduce/deltas/ReduceOperandsSkip.h -llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp -llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.h -llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.cpp -llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.h -llvm/tools/llvm-rust-demangle-fuzzer/DummyDemanglerFuzzer.cpp -llvm/tools/llvm-rust-demangle-fuzzer/llvm-rust-demangle-fuzzer.cpp -llvm/tools/llvm-shlib/libllvm.cpp -llvm/tools/llvm-special-case-list-fuzzer/DummySpecialCaseListFuzzer.cpp -llvm/tools/llvm-special-case-list-fuzzer/special-case-list-fuzzer.cpp -llvm/tools/llvm-strings/llvm-strings.cpp -llvm/tools/llvm-tapi-diff/DiffEngine.cpp -llvm/tools/llvm-tapi-diff/DiffEngine.h -llvm/tools/llvm-tapi-diff/llvm-tapi-diff.cpp -llvm/tools/llvm-undname/llvm-undname.cpp -llvm/tools/llvm-xray/func-id-helper.cpp -llvm/tools/llvm-xray/func-id-helper.h -llvm/tools/llvm-xray/llvm-xray.cpp -llvm/tools/llvm-xray/trie-node.h -llvm/tools/llvm-xray/xray-account.h -llvm/tools/llvm-xray/xray-color-helper.cpp -llvm/tools/llvm-xray/xray-color-helper.h -llvm/tools/llvm-xray/xray-converter.cpp -llvm/tools/llvm-xray/xray-converter.h -llvm/tools/llvm-xray/xray-fdr-dump.cpp -llvm/tools/llvm-xray/xray-graph-diff.cpp -llvm/tools/llvm-xray/xray-graph.h -llvm/tools/llvm-xray/xray-registry.cpp -llvm/tools/llvm-xray/xray-registry.h -llvm/tools/llvm-xray/xray-stacks.cpp -llvm/tools/llvm-yaml-numeric-parser-fuzzer/DummyYAMLNumericParserFuzzer.cpp -llvm/tools/llvm-yaml-numeric-parser-fuzzer/yaml-numeric-parser-fuzzer.cpp -llvm/tools/llvm-yaml-parser-fuzzer/DummyYAMLParserFuzzer.cpp -llvm/tools/llvm-yaml-parser-fuzzer/yaml-parser-fuzzer.cpp -llvm/tools/lto/LTODisassembler.cpp -llvm/tools/obj2yaml/archive2yaml.cpp -llvm/tools/obj2yaml/dwarf2yaml.cpp -llvm/tools/obj2yaml/minidump2yaml.cpp -llvm/tools/obj2yaml/obj2yaml.cpp -llvm/tools/obj2yaml/xcoff2yaml.cpp -llvm/tools/opt/NewPMDriver.cpp -llvm/tools/opt/PassPrinters.cpp -llvm/tools/opt/PassPrinters.h -llvm/tools/sanstats/sanstats.cpp -llvm/tools/vfabi-demangle-fuzzer/vfabi-demangler-fuzzer.cpp -llvm/tools/yaml2obj/yaml2obj.cpp -llvm/unittests/ADT/AnyTest.cpp -llvm/unittests/ADT/APSIntTest.cpp -llvm/unittests/ADT/BitFieldsTest.cpp -llvm/unittests/ADT/BreadthFirstIteratorTest.cpp -llvm/unittests/ADT/BumpPtrListTest.cpp -llvm/unittests/ADT/CombinationGeneratorTest.cpp -llvm/unittests/ADT/DirectedGraphTest.cpp -llvm/unittests/ADT/EnumeratedArrayTest.cpp -llvm/unittests/ADT/FallibleIteratorTest.cpp -llvm/unittests/ADT/FunctionRefTest.cpp -llvm/unittests/ADT/IListBaseTest.cpp -llvm/unittests/ADT/IListNodeBaseTest.cpp -llvm/unittests/ADT/IListNodeTest.cpp -llvm/unittests/ADT/ImmutableListTest.cpp -llvm/unittests/ADT/IntEqClassesTest.cpp -llvm/unittests/ADT/PointerEmbeddedIntTest.cpp -llvm/unittests/ADT/ScopeExitTest.cpp -llvm/unittests/ADT/SequenceTest.cpp -llvm/unittests/ADT/SetVectorTest.cpp -llvm/unittests/ADT/SmallSetTest.cpp -llvm/unittests/ADT/SparseMultiSetTest.cpp -llvm/unittests/ADT/SparseSetTest.cpp -llvm/unittests/ADT/StatisticTest.cpp -llvm/unittests/ADT/STLForwardCompatTest.cpp -llvm/unittests/ADT/StringExtrasTest.cpp -llvm/unittests/ADT/StringSetTest.cpp -llvm/unittests/ADT/StringSwitchTest.cpp -llvm/unittests/ADT/TypeSwitchTest.cpp -llvm/unittests/ADT/TypeTraitsTest.cpp -llvm/unittests/Analysis/BasicAliasAnalysisTest.cpp -llvm/unittests/Analysis/BlockFrequencyInfoTest.cpp -llvm/unittests/Analysis/BranchProbabilityInfoTest.cpp -llvm/unittests/Analysis/ConstraintSystemTest.cpp -llvm/unittests/Analysis/DDGTest.cpp -llvm/unittests/Analysis/DomTreeUpdaterTest.cpp -llvm/unittests/Analysis/FunctionPropertiesAnalysisTest.cpp -llvm/unittests/Analysis/GlobalsModRefTest.cpp -llvm/unittests/Analysis/InlineCostTest.cpp -llvm/unittests/Analysis/MLModelRunnerTest.cpp -llvm/unittests/Analysis/SparsePropagation.cpp -llvm/unittests/Analysis/TargetLibraryInfoTest.cpp -llvm/unittests/Analysis/ValueLatticeTest.cpp -llvm/unittests/AsmParser/AsmParserTest.cpp -llvm/unittests/BinaryFormat/DwarfTest.cpp -llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp -llvm/unittests/BinaryFormat/MsgPackReaderTest.cpp -llvm/unittests/BinaryFormat/MsgPackWriterTest.cpp -llvm/unittests/BinaryFormat/TestFileMagic.cpp -llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp -llvm/unittests/Bitstream/BitstreamWriterTest.cpp -llvm/unittests/CodeGen/AllocationOrderTest.cpp -llvm/unittests/CodeGen/AMDGPUMetadataTest.cpp -llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp -llvm/unittests/CodeGen/DIETest.cpp -llvm/unittests/CodeGen/LexicalScopesTest.cpp -llvm/unittests/CodeGen/MachineInstrBundleIteratorTest.cpp -llvm/unittests/CodeGen/RegAllocScoreTest.cpp -llvm/unittests/CodeGen/SelectionDAGAddressAnalysisTest.cpp -llvm/unittests/CodeGen/TestAsmPrinter.cpp -llvm/unittests/CodeGen/TestAsmPrinter.h -llvm/unittests/CodeGen/GlobalISel/CSETest.cpp -llvm/unittests/CodeGen/GlobalISel/KnownBitsVectorTest.cpp -llvm/unittests/DebugInfo/CodeView/GUIDFormatTest.cpp -llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFAcceleratorTableTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFDataExtractorTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFDebugArangeSetTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp -llvm/unittests/DebugInfo/DWARF/DwarfGenerator.h -llvm/unittests/DebugInfo/DWARF/DWARFListTableTest.cpp -llvm/unittests/DebugInfo/DWARF/DWARFLocationExpressionTest.cpp -llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp -llvm/unittests/DebugInfo/DWARF/DwarfUtils.h -llvm/unittests/DebugInfo/MSF/MSFBuilderTest.cpp -llvm/unittests/DebugInfo/MSF/MSFCommonTest.cpp -llvm/unittests/DebugInfo/PDB/NativeSessionTest.cpp -llvm/unittests/DebugInfo/PDB/NativeSymbolReuseTest.cpp -llvm/unittests/DebugInfo/PDB/StringTableBuilderTest.cpp -llvm/unittests/DebugInfo/PDB/Inputs/SimpleTest.cpp -llvm/unittests/Debuginfod/DebuginfodTests.cpp -llvm/unittests/Debuginfod/HTTPClientTests.cpp -llvm/unittests/Demangle/DemangleTest.cpp -llvm/unittests/Demangle/DLangDemangleTest.cpp -llvm/unittests/Demangle/OutputBufferTest.cpp -llvm/unittests/Demangle/RustDemangleTest.cpp -llvm/unittests/Demangle/StringViewTest.cpp -llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp -llvm/unittests/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManagerTest.cpp -llvm/unittests/ExecutionEngine/Orc/EPCGenericMemoryAccessTest.cpp -llvm/unittests/ExecutionEngine/Orc/ExecutionSessionWrapperFunctionCallsTest.cpp -llvm/unittests/ExecutionEngine/Orc/ExecutorAddressTest.cpp -llvm/unittests/ExecutionEngine/Orc/JITTargetMachineBuilderTest.cpp -llvm/unittests/ExecutionEngine/Orc/LazyCallThroughAndReexportsTest.cpp -llvm/unittests/ExecutionEngine/Orc/LookupAndRecordAddrsTest.cpp -llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp -llvm/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp -llvm/unittests/ExecutionEngine/Orc/ResourceTrackerTest.cpp -llvm/unittests/ExecutionEngine/Orc/SimpleExecutorMemoryManagerTest.cpp -llvm/unittests/ExecutionEngine/Orc/SimplePackedSerializationTest.cpp -llvm/unittests/ExecutionEngine/Orc/TaskDispatchTest.cpp -llvm/unittests/ExecutionEngine/Orc/ThreadSafeModuleTest.cpp -llvm/unittests/Frontend/OpenACCTest.cpp -llvm/unittests/Frontend/OpenMPContextTest.cpp -llvm/unittests/Frontend/OpenMPParsingTest.cpp -llvm/unittests/InterfaceStub/ELFYAMLTest.cpp -llvm/unittests/IR/DemandedBitsTest.cpp -llvm/unittests/IR/ManglerTest.cpp -llvm/unittests/IR/ModuleTest.cpp -llvm/unittests/IR/TimePassesTest.cpp -llvm/unittests/IR/UseTest.cpp -llvm/unittests/IR/VectorTypesTest.cpp -llvm/unittests/MC/Disassembler.cpp -llvm/unittests/MC/DwarfLineTableHeaders.cpp -llvm/unittests/MC/MCInstPrinter.cpp -llvm/unittests/MC/TargetRegistry.cpp -llvm/unittests/MC/AMDGPU/DwarfRegMappings.cpp -llvm/unittests/MC/SystemZ/SystemZAsmLexerTest.cpp -llvm/unittests/ObjCopy/ObjCopyTest.cpp -llvm/unittests/Object/ArchiveTest.cpp -llvm/unittests/Object/ELFObjectFileTest.cpp -llvm/unittests/Object/ELFTest.cpp -llvm/unittests/Object/ELFTypesTest.cpp -llvm/unittests/Object/MinidumpTest.cpp -llvm/unittests/Object/ObjectFileTest.cpp -llvm/unittests/Object/SymbolSizeTest.cpp -llvm/unittests/ObjectYAML/DWARFYAMLTest.cpp -llvm/unittests/ObjectYAML/ELFYAMLTest.cpp -llvm/unittests/ObjectYAML/MinidumpYAMLTest.cpp -llvm/unittests/Option/OptionMarshallingTest.cpp -llvm/unittests/Passes/DoublerPlugin.cpp -llvm/unittests/Passes/PassBuilderBindingsTest.cpp -llvm/unittests/Passes/PluginsTest.cpp -llvm/unittests/Passes/TestPlugin.cpp -llvm/unittests/Passes/TestPlugin.h -llvm/unittests/ProfileData/InstrProfDataTest.cpp -llvm/unittests/ProfileData/MemProfTest.cpp -llvm/unittests/ProfileData/SymbolRemappingReaderTest.cpp -llvm/unittests/Remarks/BitstreamRemarksFormatTest.cpp -llvm/unittests/Remarks/BitstreamRemarksParsingTest.cpp -llvm/unittests/Remarks/RemarksLinkingTest.cpp -llvm/unittests/Remarks/RemarksStrTabParsingTest.cpp -llvm/unittests/Remarks/YAMLRemarksSerializerTest.cpp -llvm/unittests/Support/AnnotationsTest.cpp -llvm/unittests/Support/ArrayRecyclerTest.cpp -llvm/unittests/Support/Base64Test.cpp -llvm/unittests/Support/buffer_ostream_test.cpp -llvm/unittests/Support/Chrono.cpp -llvm/unittests/Support/CSKYAttributeParserTest.cpp -llvm/unittests/Support/CSKYTargetParserTest.cpp -llvm/unittests/Support/DebugCounterTest.cpp -llvm/unittests/Support/DJBTest.cpp -llvm/unittests/Support/ELFAttributeParserTest.cpp -llvm/unittests/Support/ExtensibleRTTITest.cpp -llvm/unittests/Support/FileOutputBufferTest.cpp -llvm/unittests/Support/FSUniqueIDTest.cpp -llvm/unittests/Support/HashBuilderTest.cpp -llvm/unittests/Support/IndexedAccessorTest.cpp -llvm/unittests/Support/InstructionCostTest.cpp -llvm/unittests/Support/KnownBitsTest.h -llvm/unittests/Support/MatchersTest.cpp -llvm/unittests/Support/MemoryBufferRefTest.cpp -llvm/unittests/Support/ParallelTest.cpp -llvm/unittests/Support/raw_fd_stream_test.cpp -llvm/unittests/Support/raw_sha1_ostream_test.cpp -llvm/unittests/Support/RISCVAttributeParserTest.cpp -llvm/unittests/Support/ScaledNumberTest.cpp -llvm/unittests/Support/ScopedPrinterTest.cpp -llvm/unittests/Support/SHA256.cpp -llvm/unittests/Support/SuffixTreeTest.cpp -llvm/unittests/Support/TarWriterTest.cpp -llvm/unittests/Support/ToolOutputFileTest.cpp -llvm/unittests/Support/TypeTraitsTest.cpp -llvm/unittests/Support/UnicodeTest.cpp -llvm/unittests/Support/VersionTupleTest.cpp -llvm/unittests/Support/WithColorTest.cpp -llvm/unittests/Support/xxhashTest.cpp -llvm/unittests/Support/CommandLineInit/CommandLineInitTest.cpp -llvm/unittests/TableGen/ParserEntryPointTest.cpp -llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp -llvm/unittests/Target/AMDGPU/DwarfRegMappings.cpp -llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp -llvm/unittests/Target/ARM/InstSizes.cpp -llvm/unittests/Target/PowerPC/AIXRelocModelTest.cpp -llvm/unittests/Testing/Support/TempPathTest.cpp -llvm/unittests/TextAPI/TextStubHelpers.h -llvm/unittests/TextAPI/TextStubV1Tests.cpp -llvm/unittests/TextAPI/TextStubV2Tests.cpp -llvm/unittests/tools/llvm-cfi-verify/GraphBuilder.cpp -llvm/unittests/tools/llvm-exegesis/BenchmarkRunnerTest.cpp -llvm/unittests/tools/llvm-exegesis/PerfHelperTest.cpp -llvm/unittests/tools/llvm-exegesis/RegisterValueTest.cpp -llvm/unittests/tools/llvm-exegesis/AArch64/TargetTest.cpp -llvm/unittests/tools/llvm-exegesis/ARM/AssemblerTest.cpp -llvm/unittests/tools/llvm-exegesis/Common/AssemblerUtils.h -llvm/unittests/tools/llvm-exegesis/Mips/RegisterAliasingTest.cpp -llvm/unittests/tools/llvm-exegesis/Mips/TargetTest.cpp -llvm/unittests/tools/llvm-exegesis/Mips/TestBase.h -llvm/unittests/tools/llvm-exegesis/PowerPC/TestBase.h -llvm/unittests/tools/llvm-exegesis/X86/AssemblerTest.cpp -llvm/unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp -llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp -llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp -llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp -llvm/unittests/tools/llvm-exegesis/X86/TestBase.h -llvm/unittests/Transforms/IPO/AttributorTest.cpp -llvm/unittests/Transforms/IPO/AttributorTestBase.h -llvm/unittests/Transforms/Utils/BasicBlockUtilsTest.cpp -llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp -llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp -llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp -llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp -llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp -llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp -llvm/unittests/Transforms/Utils/VFABIUtils.cpp -llvm/unittests/Transforms/Vectorize/VPlanDominatorTreeTest.cpp -llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp -llvm/unittests/Transforms/Vectorize/VPlanLoopInfoTest.cpp -llvm/unittests/Transforms/Vectorize/VPlanPredicatorTest.cpp -llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp -llvm/unittests/Transforms/Vectorize/VPlanTest.cpp -llvm/unittests/Transforms/Vectorize/VPlanTestBase.h -llvm/unittests/XRay/FDRBlockIndexerTest.cpp -llvm/unittests/XRay/FDRBlockVerifierTest.cpp -llvm/unittests/XRay/FDRProducerConsumerTest.cpp -llvm/unittests/XRay/FDRRecordPrinterTest.cpp -llvm/unittests/XRay/FDRRecordsTest.cpp -llvm/unittests/XRay/FDRTraceWriterTest.cpp -llvm/unittests/XRay/ProfileTest.cpp -llvm/utils/not/not.cpp -llvm/Utils/TableGen/CodeGenInstAlias.cpp -llvm/Utils/TableGen/CodeGenInstAlias.h -llvm/utils/TableGen/CodeBeadsGen.cpp -llvm/utils/TableGen/CompressInstEmitter.cpp -llvm/utils/TableGen/DFAEmitter.h -llvm/utils/TableGen/DirectiveEmitter.cpp -llvm/utils/TableGen/ExegesisEmitter.cpp -llvm/utils/TableGen/OptEmitter.cpp -llvm/utils/TableGen/OptEmitter.h -llvm/utils/TableGen/OptRSTEmitter.cpp -llvm/utils/TableGen/PredicateExpander.h -llvm/utils/TableGen/SDNodeProperties.cpp -llvm/utils/TableGen/VarLenCodeEmitterGen.cpp -llvm/utils/TableGen/VarLenCodeEmitterGen.h -llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h -llvm/utils/TableGen/GlobalISel/CodeExpander.cpp -llvm/utils/TableGen/GlobalISel/CodeExpander.h -llvm/utils/TableGen/GlobalISel/CodeExpansions.h -llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp -llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp -llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h -llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp -llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h -llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp -llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h -mlir/examples/standalone/include/Standalone/StandaloneDialect.h -mlir/examples/standalone/include/Standalone/StandaloneOps.h -mlir/examples/standalone/include/Standalone-c/Dialects.h -mlir/examples/standalone/lib/CAPI/Dialects.cpp -mlir/examples/standalone/lib/Standalone/StandaloneDialect.cpp -mlir/examples/standalone/lib/Standalone/StandaloneOps.cpp -mlir/examples/standalone/python/StandaloneExtension.cpp -mlir/examples/standalone/standalone-opt/standalone-opt.cpp -mlir/examples/standalone/standalone-translate/standalone-translate.cpp -mlir/examples/toy/Ch1/toyc.cpp -mlir/examples/toy/Ch1/include/toy/AST.h -mlir/examples/toy/Ch1/include/toy/Lexer.h -mlir/examples/toy/Ch1/include/toy/Parser.h -mlir/examples/toy/Ch2/toyc.cpp -mlir/examples/toy/Ch2/include/toy/AST.h -mlir/examples/toy/Ch2/include/toy/Dialect.h -mlir/examples/toy/Ch2/include/toy/Lexer.h -mlir/examples/toy/Ch2/include/toy/MLIRGen.h -mlir/examples/toy/Ch2/include/toy/Parser.h -mlir/examples/toy/Ch2/mlir/Dialect.cpp -mlir/examples/toy/Ch2/mlir/MLIRGen.cpp -mlir/examples/toy/Ch3/toyc.cpp -mlir/examples/toy/Ch3/include/toy/AST.h -mlir/examples/toy/Ch3/include/toy/Dialect.h -mlir/examples/toy/Ch3/include/toy/Lexer.h -mlir/examples/toy/Ch3/include/toy/MLIRGen.h -mlir/examples/toy/Ch3/include/toy/Parser.h -mlir/examples/toy/Ch3/mlir/Dialect.cpp -mlir/examples/toy/Ch3/mlir/MLIRGen.cpp -mlir/examples/toy/Ch3/mlir/ToyCombine.cpp -mlir/examples/toy/Ch4/toyc.cpp -mlir/examples/toy/Ch4/include/toy/AST.h -mlir/examples/toy/Ch4/include/toy/Dialect.h -mlir/examples/toy/Ch4/include/toy/Lexer.h -mlir/examples/toy/Ch4/include/toy/MLIRGen.h -mlir/examples/toy/Ch4/include/toy/Parser.h -mlir/examples/toy/Ch4/include/toy/Passes.h -mlir/examples/toy/Ch4/include/toy/ShapeInferenceInterface.h -mlir/examples/toy/Ch4/mlir/Dialect.cpp -mlir/examples/toy/Ch4/mlir/MLIRGen.cpp -mlir/examples/toy/Ch4/mlir/ShapeInferencePass.cpp -mlir/examples/toy/Ch4/mlir/ToyCombine.cpp -mlir/examples/toy/Ch5/toyc.cpp -mlir/examples/toy/Ch5/include/toy/AST.h -mlir/examples/toy/Ch5/include/toy/Dialect.h -mlir/examples/toy/Ch5/include/toy/Lexer.h -mlir/examples/toy/Ch5/include/toy/MLIRGen.h -mlir/examples/toy/Ch5/include/toy/Parser.h -mlir/examples/toy/Ch5/include/toy/Passes.h -mlir/examples/toy/Ch5/include/toy/ShapeInferenceInterface.h -mlir/examples/toy/Ch5/mlir/Dialect.cpp -mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp -mlir/examples/toy/Ch5/mlir/MLIRGen.cpp -mlir/examples/toy/Ch5/mlir/ShapeInferencePass.cpp -mlir/examples/toy/Ch5/mlir/ToyCombine.cpp -mlir/examples/toy/Ch6/toyc.cpp -mlir/examples/toy/Ch6/include/toy/AST.h -mlir/examples/toy/Ch6/include/toy/Dialect.h -mlir/examples/toy/Ch6/include/toy/Lexer.h -mlir/examples/toy/Ch6/include/toy/MLIRGen.h -mlir/examples/toy/Ch6/include/toy/Parser.h -mlir/examples/toy/Ch6/include/toy/Passes.h -mlir/examples/toy/Ch6/include/toy/ShapeInferenceInterface.h -mlir/examples/toy/Ch6/mlir/Dialect.cpp -mlir/examples/toy/Ch6/mlir/LowerToAffineLoops.cpp -mlir/examples/toy/Ch6/mlir/LowerToLLVM.cpp -mlir/examples/toy/Ch6/mlir/MLIRGen.cpp -mlir/examples/toy/Ch6/mlir/ShapeInferencePass.cpp -mlir/examples/toy/Ch6/mlir/ToyCombine.cpp -mlir/examples/toy/Ch7/toyc.cpp -mlir/examples/toy/Ch7/include/toy/AST.h -mlir/examples/toy/Ch7/include/toy/Dialect.h -mlir/examples/toy/Ch7/include/toy/Lexer.h -mlir/examples/toy/Ch7/include/toy/MLIRGen.h -mlir/examples/toy/Ch7/include/toy/Parser.h -mlir/examples/toy/Ch7/include/toy/Passes.h -mlir/examples/toy/Ch7/include/toy/ShapeInferenceInterface.h -mlir/examples/toy/Ch7/mlir/Dialect.cpp -mlir/examples/toy/Ch7/mlir/LowerToAffineLoops.cpp -mlir/examples/toy/Ch7/mlir/LowerToLLVM.cpp -mlir/examples/toy/Ch7/mlir/MLIRGen.cpp -mlir/examples/toy/Ch7/mlir/ShapeInferencePass.cpp -mlir/examples/toy/Ch7/mlir/ToyCombine.cpp -mlir/include/mlir/InitAllDialects.h -mlir/include/mlir/InitAllPasses.h -mlir/include/mlir/InitAllTranslations.h -mlir/include/mlir/Parser/Parser.h -mlir/include/mlir/Translation.h -mlir/include/mlir/Analysis/BufferViewFlowAnalysis.h -mlir/include/mlir/Analysis/DataFlowAnalysis.h -mlir/include/mlir/Analysis/DataLayoutAnalysis.h -mlir/include/mlir/Analysis/Liveness.h -mlir/include/mlir/Analysis/SliceAnalysis.h -mlir/include/mlir/Analysis/AliasAnalysis/LocalAliasAnalysis.h -mlir/include/mlir/Analysis/Presburger/Fraction.h -mlir/include/mlir/Analysis/Presburger/IntegerRelation.h -mlir/include/mlir/Analysis/Presburger/LinearTransform.h -mlir/include/mlir/Analysis/Presburger/Matrix.h -mlir/include/mlir/Analysis/Presburger/PresburgerSet.h -mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h -mlir/include/mlir/Analysis/Presburger/PWMAFunction.h -mlir/include/mlir/Analysis/Presburger/Simplex.h -mlir/include/mlir/Analysis/Presburger/Utils.h -mlir/include/mlir/CAPI/AffineExpr.h -mlir/include/mlir/CAPI/AffineMap.h -mlir/include/mlir/CAPI/Diagnostics.h -mlir/include/mlir/CAPI/ExecutionEngine.h -mlir/include/mlir/CAPI/IntegerSet.h -mlir/include/mlir/CAPI/Interfaces.h -mlir/include/mlir/CAPI/IR.h -mlir/include/mlir/CAPI/Pass.h -mlir/include/mlir/CAPI/Registration.h -mlir/include/mlir/CAPI/Support.h -mlir/include/mlir/CAPI/Utils.h -mlir/include/mlir/CAPI/Wrap.h -mlir/include/mlir/Conversion/Passes.h -mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h -mlir/include/mlir/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.h -mlir/include/mlir/Conversion/ArithmeticToSPIRV/ArithmeticToSPIRV.h -mlir/include/mlir/Conversion/ArmNeon2dToIntr/ArmNeon2dToIntr.h -mlir/include/mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h -mlir/include/mlir/Conversion/BufferizationToMemRef/BufferizationToMemRef.h -mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h -mlir/include/mlir/Conversion/ComplexToStandard/ComplexToStandard.h -mlir/include/mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h -mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h -mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.h -mlir/include/mlir/Conversion/FuncToSPIRV/FuncToSPIRV.h -mlir/include/mlir/Conversion/FuncToSPIRV/FuncToSPIRVPass.h -mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h -mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h -mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h -mlir/include/mlir/Conversion/GPUToROCDL/Runtimes.h -mlir/include/mlir/Conversion/GPUToSPIRV/GPUToSPIRV.h -mlir/include/mlir/Conversion/GPUToSPIRV/GPUToSPIRVPass.h -mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h -mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.h -mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h -mlir/include/mlir/Conversion/LLVMCommon/ConversionTarget.h -mlir/include/mlir/Conversion/LLVMCommon/LoweringOptions.h -mlir/include/mlir/Conversion/LLVMCommon/MemRefBuilder.h -mlir/include/mlir/Conversion/LLVMCommon/Pattern.h -mlir/include/mlir/Conversion/LLVMCommon/StructBuilder.h -mlir/include/mlir/Conversion/LLVMCommon/TypeConverter.h -mlir/include/mlir/Conversion/LLVMCommon/VectorPattern.h -mlir/include/mlir/Conversion/MathToLibm/MathToLibm.h -mlir/include/mlir/Conversion/MathToLLVM/MathToLLVM.h -mlir/include/mlir/Conversion/MathToSPIRV/MathToSPIRV.h -mlir/include/mlir/Conversion/MathToSPIRV/MathToSPIRVPass.h -mlir/include/mlir/Conversion/MemRefToLLVM/AllocLikeConversion.h -mlir/include/mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h -mlir/include/mlir/Conversion/MemRefToSPIRV/MemRefToSPIRV.h -mlir/include/mlir/Conversion/MemRefToSPIRV/MemRefToSPIRVPass.h -mlir/include/mlir/Conversion/OpenACCToLLVM/ConvertOpenACCToLLVM.h -mlir/include/mlir/Conversion/OpenACCToSCF/ConvertOpenACCToSCF.h -mlir/include/mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h -mlir/include/mlir/Conversion/PDLToPDLInterp/PDLToPDLInterp.h -mlir/include/mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h -mlir/include/mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h -mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h -mlir/include/mlir/Conversion/SCFToGPU/SCFToGPUPass.h -mlir/include/mlir/Conversion/SCFToOpenMP/SCFToOpenMP.h -mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h -mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRVPass.h -mlir/include/mlir/Conversion/ShapeToStandard/ShapeToStandard.h -mlir/include/mlir/Conversion/SPIRVToLLVM/SPIRVToLLVM.h -mlir/include/mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h -mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h -mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h -mlir/include/mlir/Conversion/TensorToSPIRV/TensorToSPIRV.h -mlir/include/mlir/Conversion/TensorToSPIRV/TensorToSPIRVPass.h -mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h -mlir/include/mlir/Conversion/TosaToSCF/TosaToSCF.h -mlir/include/mlir/Conversion/TosaToStandard/TosaToStandard.h -mlir/include/mlir/Conversion/VectorToGPU/VectorToGPU.h -mlir/include/mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h -mlir/include/mlir/Conversion/VectorToSCF/VectorToSCF.h -mlir/include/mlir/Conversion/VectorToSPIRV/VectorToSPIRV.h -mlir/include/mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h -mlir/include/mlir/Dialect/CommonFolders.h -mlir/include/mlir/Dialect/Traits.h -mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h -mlir/include/mlir/Dialect/Affine/LoopUtils.h -mlir/include/mlir/Dialect/Affine/Passes.h -mlir/include/mlir/Dialect/Affine/Utils.h -mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h -mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h -mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h -mlir/include/mlir/Dialect/Affine/Analysis/NestedMatcher.h -mlir/include/mlir/Dialect/Affine/Analysis/Utils.h -mlir/include/mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.h -mlir/include/mlir/Dialect/Affine/IR/AffineOps.h -mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h -mlir/include/mlir/Dialect/AMX/AMXDialect.h -mlir/include/mlir/Dialect/AMX/Transforms.h -mlir/include/mlir/Dialect/Arithmetic/IR/Arithmetic.h -mlir/include/mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h -mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.h -mlir/include/mlir/Dialect/Arithmetic/Utils/Utils.h -mlir/include/mlir/Dialect/ArmNeon/ArmNeonDialect.h -mlir/include/mlir/Dialect/ArmSVE/ArmSVEDialect.h -mlir/include/mlir/Dialect/ArmSVE/Transforms.h -mlir/include/mlir/Dialect/Async/Passes.h -mlir/include/mlir/Dialect/Async/Transforms.h -mlir/include/mlir/Dialect/Async/IR/Async.h -mlir/include/mlir/Dialect/Async/IR/AsyncTypes.h -mlir/include/mlir/Dialect/Bufferization/IR/AllocationOpInterface.h -mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h -mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h -mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h -mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h -mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h -mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h -mlir/include/mlir/Dialect/Complex/IR/Complex.h -mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlow.h -mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.h -mlir/include/mlir/Dialect/DLTI/DLTI.h -mlir/include/mlir/Dialect/DLTI/Traits.h -mlir/include/mlir/Dialect/EmitC/IR/EmitC.h -mlir/include/mlir/Dialect/Func/IR/FuncOps.h -mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h -mlir/include/mlir/Dialect/Func/Transforms/FuncConversions.h -mlir/include/mlir/Dialect/Func/Transforms/Passes.h -mlir/include/mlir/Dialect/GPU/GPUDialect.h -mlir/include/mlir/Dialect/GPU/MemoryPromotion.h -mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h -mlir/include/mlir/Dialect/GPU/Passes.h -mlir/include/mlir/Dialect/GPU/Utils.h -mlir/include/mlir/Dialect/Linalg/Passes.h -mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h -mlir/include/mlir/Dialect/Linalg/IR/Linalg.h -mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h -mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h -mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h -mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h -mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h -mlir/include/mlir/Dialect/Linalg/Utils/Utils.h -mlir/include/mlir/Dialect/LLVMIR/FunctionCallUtils.h -mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h -mlir/include/mlir/Dialect/LLVMIR/LLVMTypes.h -mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h -mlir/include/mlir/Dialect/LLVMIR/ROCDLDialect.h -mlir/include/mlir/Dialect/LLVMIR/Transforms/LegalizeForExport.h -mlir/include/mlir/Dialect/LLVMIR/Transforms/Passes.h -mlir/include/mlir/Dialect/Math/IR/Math.h -mlir/include/mlir/Dialect/Math/Transforms/Approximation.h -mlir/include/mlir/Dialect/Math/Transforms/Passes.h -mlir/include/mlir/Dialect/MemRef/IR/MemRef.h -mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h -mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h -mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h -mlir/include/mlir/Dialect/OpenACC/OpenACC.h -mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h -mlir/include/mlir/Dialect/PDL/IR/PDL.h -mlir/include/mlir/Dialect/PDL/IR/PDLOps.h -mlir/include/mlir/Dialect/PDL/IR/PDLTypes.h -mlir/include/mlir/Dialect/PDLInterp/IR/PDLInterp.h -mlir/include/mlir/Dialect/Quant/FakeQuantSupport.h -mlir/include/mlir/Dialect/Quant/Passes.h -mlir/include/mlir/Dialect/Quant/QuantizeUtils.h -mlir/include/mlir/Dialect/Quant/QuantOps.h -mlir/include/mlir/Dialect/Quant/QuantTypes.h -mlir/include/mlir/Dialect/Quant/UniformSupport.h -mlir/include/mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h -mlir/include/mlir/Dialect/SCF/Passes.h -mlir/include/mlir/Dialect/SCF/SCF.h -mlir/include/mlir/Dialect/SCF/Transforms.h -mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h -mlir/include/mlir/Dialect/SCF/Utils/Utils.h -mlir/include/mlir/Dialect/Shape/IR/Shape.h -mlir/include/mlir/Dialect/Shape/Transforms/Passes.h -mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h -mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h -mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h -mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h -mlir/include/mlir/Dialect/SPIRV/IR/ParserUtils.h -mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.h -mlir/include/mlir/Dialect/SPIRV/IR/SPIRVDialect.h -mlir/include/mlir/Dialect/SPIRV/IR/SPIRVEnums.h -mlir/include/mlir/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.h -mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOps.h -mlir/include/mlir/Dialect/SPIRV/IR/SPIRVOpTraits.h -mlir/include/mlir/Dialect/SPIRV/IR/SPIRVTypes.h -mlir/include/mlir/Dialect/SPIRV/IR/TargetAndABI.h -mlir/include/mlir/Dialect/SPIRV/Linking/ModuleCombiner.h -mlir/include/mlir/Dialect/SPIRV/Transforms/Passes.h -mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h -mlir/include/mlir/Dialect/SPIRV/Utils/LayoutUtils.h -mlir/include/mlir/Dialect/Tensor/IR/Tensor.h -mlir/include/mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h -mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h -mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h -mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h -mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h -mlir/include/mlir/Dialect/Tensor/Utils/Utils.h -mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h -mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h -mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h -mlir/include/mlir/Dialect/Tosa/Utils/CoversionUtils.h -mlir/include/mlir/Dialect/Tosa/Utils/QuantUtils.h -mlir/include/mlir/Dialect/Tosa/Utils/ShapeUtils.h -mlir/include/mlir/Dialect/Utils/IndexingUtils.h -mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h -mlir/include/mlir/Dialect/Utils/StaticValueUtils.h -mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h -mlir/include/mlir/Dialect/Vector/IR/VectorOps.h -mlir/include/mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h -mlir/include/mlir/Dialect/Vector/Transforms/Passes.h -mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h -mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h -mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h -mlir/include/mlir/Dialect/X86Vector/Transforms.h -mlir/include/mlir/Dialect/X86Vector/X86VectorDialect.h -mlir/include/mlir/ExecutionEngine/AsyncRuntime.h -mlir/include/mlir/ExecutionEngine/CRunnerUtils.h -mlir/include/mlir/ExecutionEngine/JitRunner.h -mlir/include/mlir/ExecutionEngine/MemRefUtils.h -mlir/include/mlir/ExecutionEngine/OptUtils.h -mlir/include/mlir/ExecutionEngine/RunnerUtils.h -mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h -mlir/include/mlir/Interfaces/CallInterfaces.h -mlir/include/mlir/Interfaces/CastInterfaces.h -mlir/include/mlir/Interfaces/ControlFlowInterfaces.h -mlir/include/mlir/Interfaces/CopyOpInterface.h -mlir/include/mlir/Interfaces/DataLayoutInterfaces.h -mlir/include/mlir/Interfaces/DecodeAttributesInterfaces.h -mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.h -mlir/include/mlir/Interfaces/FoldInterfaces.h -mlir/include/mlir/Interfaces/InferTypeOpInterface.h -mlir/include/mlir/Interfaces/LoopLikeInterface.h -mlir/include/mlir/Interfaces/TilingInterface.h -mlir/include/mlir/Interfaces/VectorInterfaces.h -mlir/include/mlir/Interfaces/ViewLikeInterface.h -mlir/include/mlir/IR/AffineExpr.h -mlir/include/mlir/IR/AffineMap.h -mlir/include/mlir/IR/AsmState.h -mlir/include/mlir/IR/AttributeSupport.h -mlir/include/mlir/IR/Block.h -mlir/include/mlir/IR/Builders.h -mlir/include/mlir/IR/BuiltinDialect.h -mlir/include/mlir/IR/BuiltinOps.h -mlir/include/mlir/IR/BuiltinTypes.h -mlir/include/mlir/IR/DialectImplementation.h -mlir/include/mlir/IR/Dominance.h -mlir/include/mlir/IR/FunctionImplementation.h -mlir/include/mlir/IR/FunctionInterfaces.h -mlir/include/mlir/IR/ImplicitLocOpBuilder.h -mlir/include/mlir/IR/Matchers.h -mlir/include/mlir/IR/MLIRContext.h -mlir/include/mlir/IR/OpDefinition.h -mlir/include/mlir/IR/OwningOpRef.h -mlir/include/mlir/IR/Region.h -mlir/include/mlir/IR/RegionKindInterface.h -mlir/include/mlir/IR/SubElementInterfaces.h -mlir/include/mlir/IR/SymbolTable.h -mlir/include/mlir/IR/TensorEncoding.h -mlir/include/mlir/IR/Threading.h -mlir/include/mlir/IR/TypeRange.h -mlir/include/mlir/IR/TypeUtilities.h -mlir/include/mlir/IR/Value.h -mlir/include/mlir/IR/Verifier.h -mlir/include/mlir/IR/Visitors.h -mlir/include/mlir/Parser/AsmParserState.h -mlir/include/mlir/Reducer/PassDetail.h -mlir/include/mlir/Reducer/Passes.h -mlir/include/mlir/Reducer/ReductionNode.h -mlir/include/mlir/Reducer/ReductionPatternInterface.h -mlir/include/mlir/Reducer/Tester.h -mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h -mlir/include/mlir/Rewrite/PatternApplicator.h -mlir/include/mlir/Support/DebugCounter.h -mlir/include/mlir/Support/DebugStringHelper.h -mlir/include/mlir/Support/FileUtilities.h -mlir/include/mlir/Support/IndentedOstream.h -mlir/include/mlir/Support/MathExtras.h -mlir/include/mlir/Support/MlirOptMain.h -mlir/include/mlir/Support/ThreadLocalCache.h -mlir/include/mlir/Support/Timing.h -mlir/include/mlir/Support/ToolUtilities.h -mlir/include/mlir/TableGen/Argument.h -mlir/include/mlir/TableGen/Attribute.h -mlir/include/mlir/TableGen/AttrOrTypeDef.h -mlir/include/mlir/TableGen/Builder.h -mlir/include/mlir/TableGen/Class.h -mlir/include/mlir/TableGen/Constraint.h -mlir/include/mlir/TableGen/Dialect.h -mlir/include/mlir/TableGen/GenInfo.h -mlir/include/mlir/TableGen/GenNameParser.h -mlir/include/mlir/TableGen/Interfaces.h -mlir/include/mlir/TableGen/Operator.h -mlir/include/mlir/TableGen/Pass.h -mlir/include/mlir/TableGen/Pattern.h -mlir/include/mlir/TableGen/Predicate.h -mlir/include/mlir/TableGen/Region.h -mlir/include/mlir/TableGen/SideEffects.h -mlir/include/mlir/TableGen/Successor.h -mlir/include/mlir/TableGen/Trait.h -mlir/include/mlir/TableGen/Type.h -mlir/include/mlir/Target/Cpp/CppEmitter.h -mlir/include/mlir/Target/LLVMIR/Export.h -mlir/include/mlir/Target/LLVMIR/Import.h -mlir/include/mlir/Target/LLVMIR/LLVMTranslationInterface.h -mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h -mlir/include/mlir/Target/LLVMIR/TypeFromLLVM.h -mlir/include/mlir/Target/LLVMIR/TypeToLLVM.h -mlir/include/mlir/Target/LLVMIR/Dialect/All.h -mlir/include/mlir/Target/LLVMIR/Dialect/AMX/AMXToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/ArmNeon/ArmNeonToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/ArmSVE/ArmSVEToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h -mlir/include/mlir/Target/LLVMIR/Dialect/X86Vector/X86VectorToLLVMIRTranslation.h -mlir/include/mlir/Target/SPIRV/Deserialization.h -mlir/include/mlir/Target/SPIRV/Serialization.h -mlir/include/mlir/Target/SPIRV/SPIRVBinaryUtils.h -mlir/include/mlir/Tools/mlir-lsp-server/MlirLspServerMain.h -mlir/include/mlir/Tools/mlir-reduce/MlirReduceMain.h -mlir/include/mlir/Tools/PDLL/AST/Context.h -mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h -mlir/include/mlir/Tools/PDLL/CodeGen/CPPGen.h -mlir/include/mlir/Tools/PDLL/CodeGen/MLIRGen.h -mlir/include/mlir/Tools/PDLL/ODS/Constraint.h -mlir/include/mlir/Tools/PDLL/ODS/Context.h -mlir/include/mlir/Tools/PDLL/ODS/Dialect.h -mlir/include/mlir/Tools/PDLL/ODS/Operation.h -mlir/include/mlir/Tools/PDLL/Parser/Parser.h -mlir/include/mlir/Transforms/ControlFlowSinkUtils.h -mlir/include/mlir/Transforms/DialectConversion.h -mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h -mlir/include/mlir/Transforms/InliningUtils.h -mlir/include/mlir/Transforms/LocationSnapshot.h -mlir/include/mlir/Transforms/Passes.h -mlir/include/mlir/Transforms/RegionUtils.h -mlir/include/mlir-c/AffineExpr.h -mlir/include/mlir-c/AffineMap.h -mlir/include/mlir-c/BuiltinAttributes.h -mlir/include/mlir-c/BuiltinTypes.h -mlir/include/mlir-c/Conversion.h -mlir/include/mlir-c/Debug.h -mlir/include/mlir-c/Diagnostics.h -mlir/include/mlir-c/ExecutionEngine.h -mlir/include/mlir-c/IntegerSet.h -mlir/include/mlir-c/Interfaces.h -mlir/include/mlir-c/IR.h -mlir/include/mlir-c/Pass.h -mlir/include/mlir-c/Registration.h -mlir/include/mlir-c/Support.h -mlir/include/mlir-c/Transforms.h -mlir/include/mlir-c/Bindings/Python/Interop.h -mlir/include/mlir-c/Dialect/Async.h -mlir/include/mlir-c/Dialect/Func.h -mlir/include/mlir-c/Dialect/GPU.h -mlir/include/mlir-c/Dialect/Linalg.h -mlir/include/mlir-c/Dialect/LLVM.h -mlir/include/mlir-c/Dialect/PDL.h -mlir/include/mlir-c/Dialect/Quant.h -mlir/include/mlir-c/Dialect/SCF.h -mlir/include/mlir-c/Dialect/Shape.h -mlir/include/mlir-c/Dialect/SparseTensor.h -mlir/include/mlir-c/Dialect/Tensor.h -mlir/lib/Analysis/AliasAnalysis.cpp -mlir/lib/Analysis/BufferViewFlowAnalysis.cpp -mlir/lib/Analysis/CallGraph.cpp -mlir/lib/Analysis/DataFlowAnalysis.cpp -mlir/lib/Analysis/DataLayoutAnalysis.cpp -mlir/lib/Analysis/Liveness.cpp -mlir/lib/Analysis/SliceAnalysis.cpp -mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp -mlir/lib/Analysis/Presburger/IntegerRelation.cpp -mlir/lib/Analysis/Presburger/LinearTransform.cpp -mlir/lib/Analysis/Presburger/Matrix.cpp -mlir/lib/Analysis/Presburger/PresburgerSet.cpp -mlir/lib/Analysis/Presburger/PresburgerSpace.cpp -mlir/lib/Analysis/Presburger/PWMAFunction.cpp -mlir/lib/Analysis/Presburger/Simplex.cpp -mlir/lib/Analysis/Presburger/Utils.cpp -mlir/lib/Bindings/Python/AllPassesRegistration.cpp -mlir/lib/Bindings/Python/AsyncPasses.cpp -mlir/lib/Bindings/Python/DialectLinalg.cpp -mlir/lib/Bindings/Python/DialectPDL.cpp -mlir/lib/Bindings/Python/DialectQuant.cpp -mlir/lib/Bindings/Python/DialectSparseTensor.cpp -mlir/lib/Bindings/Python/ExecutionEngineModule.cpp -mlir/lib/Bindings/Python/Globals.h -mlir/lib/Bindings/Python/GPUPasses.cpp -mlir/lib/Bindings/Python/IRAffine.cpp -mlir/lib/Bindings/Python/IRAttributes.cpp -mlir/lib/Bindings/Python/IRCore.cpp -mlir/lib/Bindings/Python/IRInterfaces.cpp -mlir/lib/Bindings/Python/IRModule.cpp -mlir/lib/Bindings/Python/IRModule.h -mlir/lib/Bindings/Python/IRTypes.cpp -mlir/lib/Bindings/Python/LinalgPasses.cpp -mlir/lib/Bindings/Python/MainModule.cpp -mlir/lib/Bindings/Python/Pass.cpp -mlir/lib/Bindings/Python/Pass.h -mlir/lib/Bindings/Python/PybindUtils.cpp -mlir/lib/Bindings/Python/PybindUtils.h -mlir/lib/Bindings/Python/SparseTensorPasses.cpp -mlir/lib/Bindings/Python/Conversions/Conversions.cpp -mlir/lib/Bindings/Python/Transforms/Transforms.cpp -mlir/lib/CAPI/Conversion/Passes.cpp -mlir/lib/CAPI/Debug/Debug.cpp -mlir/lib/CAPI/Dialect/Async.cpp -mlir/lib/CAPI/Dialect/AsyncPasses.cpp -mlir/lib/CAPI/Dialect/Func.cpp -mlir/lib/CAPI/Dialect/GPU.cpp -mlir/lib/CAPI/Dialect/GPUPasses.cpp -mlir/lib/CAPI/Dialect/Linalg.cpp -mlir/lib/CAPI/Dialect/LinalgPasses.cpp -mlir/lib/CAPI/Dialect/LLVM.cpp -mlir/lib/CAPI/Dialect/PDL.cpp -mlir/lib/CAPI/Dialect/Quant.cpp -mlir/lib/CAPI/Dialect/SCF.cpp -mlir/lib/CAPI/Dialect/Shape.cpp -mlir/lib/CAPI/Dialect/SparseTensor.cpp -mlir/lib/CAPI/Dialect/SparseTensorPasses.cpp -mlir/lib/CAPI/Dialect/Tensor.cpp -mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp -mlir/lib/CAPI/Interfaces/Interfaces.cpp -mlir/lib/CAPI/IR/AffineExpr.cpp -mlir/lib/CAPI/IR/AffineMap.cpp -mlir/lib/CAPI/IR/BuiltinAttributes.cpp -mlir/lib/CAPI/IR/BuiltinTypes.cpp -mlir/lib/CAPI/IR/Diagnostics.cpp -mlir/lib/CAPI/IR/DialectHandle.cpp -mlir/lib/CAPI/IR/IntegerSet.cpp -mlir/lib/CAPI/IR/IR.cpp -mlir/lib/CAPI/IR/Pass.cpp -mlir/lib/CAPI/IR/Support.cpp -mlir/lib/CAPI/Registration/Registration.cpp -mlir/lib/CAPI/Transforms/Passes.cpp -mlir/lib/Conversion/PassDetail.h -mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp -mlir/lib/Conversion/ArithmeticToLLVM/ArithmeticToLLVM.cpp -mlir/lib/Conversion/ArithmeticToSPIRV/ArithmeticToSPIRV.cpp -mlir/lib/Conversion/ArmNeon2dToIntr/ArmNeon2dToIntr.cpp -mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp -mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp -mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp -mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp -mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp -mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp -mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRV.cpp -mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp -mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp -mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h -mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h -mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h -mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp -mlir/lib/Conversion/GPUToNVVM/WmmaOpsToNvvm.cpp -mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp -mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp -mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp -mlir/lib/Conversion/GPUToVulkan/ConvertGPULaunchFuncToVulkanLaunchFunc.cpp -mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp -mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp -mlir/lib/Conversion/LLVMCommon/ConversionTarget.cpp -mlir/lib/Conversion/LLVMCommon/LoweringOptions.cpp -mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp -mlir/lib/Conversion/LLVMCommon/MemRefDescriptor.h -mlir/lib/Conversion/LLVMCommon/Pattern.cpp -mlir/lib/Conversion/LLVMCommon/StructBuilder.cpp -mlir/lib/Conversion/LLVMCommon/TypeConverter.cpp -mlir/lib/Conversion/LLVMCommon/VectorPattern.cpp -mlir/lib/Conversion/MathToLibm/MathToLibm.cpp -mlir/lib/Conversion/MathToLLVM/MathToLLVM.cpp -mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp -mlir/lib/Conversion/MathToSPIRV/MathToSPIRVPass.cpp -mlir/lib/Conversion/MemRefToLLVM/AllocLikeConversion.cpp -mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp -mlir/lib/Conversion/MemRefToSPIRV/MemRefToSPIRV.cpp -mlir/lib/Conversion/MemRefToSPIRV/MemRefToSPIRVPass.cpp -mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp -mlir/lib/Conversion/OpenACCToSCF/OpenACCToSCF.cpp -mlir/lib/Conversion/OpenMPToLLVM/OpenMPToLLVM.cpp -mlir/lib/Conversion/PDLToPDLInterp/PDLToPDLInterp.cpp -mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp -mlir/lib/Conversion/PDLToPDLInterp/Predicate.h -mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp -mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.h -mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp -mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h -mlir/lib/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.cpp -mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp -mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp -mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp -mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp -mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp -mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRVPass.cpp -mlir/lib/Conversion/ShapeToStandard/ConvertShapeConstraints.cpp -mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp -mlir/lib/Conversion/SPIRVCommon/Pattern.h -mlir/lib/Conversion/SPIRVToLLVM/ConvertLaunchFuncToLLVMCalls.cpp -mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp -mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.cpp -mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp -mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRV.cpp -mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp -mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp -mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp -mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp -mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp -mlir/lib/Conversion/TosaToSCF/TosaToSCF.cpp -mlir/lib/Conversion/TosaToSCF/TosaToSCFPass.cpp -mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp -mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp -mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp -mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp -mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp -mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRVPass.cpp -mlir/lib/Dialect/Traits.cpp -mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp -mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp -mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp -mlir/lib/Dialect/Affine/Analysis/NestedMatcher.cpp -mlir/lib/Dialect/Affine/Analysis/Utils.cpp -mlir/lib/Dialect/Affine/IR/AffineMemoryOpInterfaces.cpp -mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp -mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp -mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp -mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp -mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp -mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp -mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp -mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp -mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp -mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp -mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp -mlir/lib/Dialect/Affine/Transforms/PassDetail.h -mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp -mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp -mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp -mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp -mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp -mlir/lib/Dialect/Affine/Utils/Utils.cpp -mlir/lib/Dialect/AMX/IR/AMXDialect.cpp -mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp -mlir/lib/Dialect/Arithmetic/IR/ArithmeticDialect.cpp -mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp -mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp -mlir/lib/Dialect/Arithmetic/Transforms/PassDetail.h -mlir/lib/Dialect/Arithmetic/Utils/Utils.cpp -mlir/lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp -mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp -mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp -mlir/lib/Dialect/Async/IR/Async.cpp -mlir/lib/Dialect/Async/Transforms/AsyncParallelFor.cpp -mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCounting.cpp -mlir/lib/Dialect/Async/Transforms/AsyncRuntimeRefCountingOpt.cpp -mlir/lib/Dialect/Async/Transforms/AsyncToAsyncRuntime.cpp -mlir/lib/Dialect/Async/Transforms/PassDetail.cpp -mlir/lib/Dialect/Async/Transforms/PassDetail.h -mlir/lib/Dialect/Bufferization/IR/AllocationOpInterface.cpp -mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp -mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp -mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp -mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp -mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp -mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp -mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp -mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp -mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp -mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h -mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp -mlir/lib/Dialect/Complex/IR/ComplexOps.cpp -mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp -mlir/lib/Dialect/DLTI/DLTI.cpp -mlir/lib/Dialect/DLTI/Traits.cpp -mlir/lib/Dialect/EmitC/IR/EmitC.cpp -mlir/lib/Dialect/Func/IR/FuncOps.cpp -mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp -mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp -mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp -mlir/lib/Dialect/Func/Transforms/PassDetail.h -mlir/lib/Dialect/GPU/IR/GPUDialect.cpp -mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp -mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp -mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp -mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp -mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp -mlir/lib/Dialect/GPU/Transforms/PassDetail.h -mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp -mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp -mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp -mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp -mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp -mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp -mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp -mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp -mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp -mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp -mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp -mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp -mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp -mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp -mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp -mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp -mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp -mlir/lib/Dialect/Linalg/Transforms/InlineScalarOperands.cpp -mlir/lib/Dialect/Linalg/Transforms/Interchange.cpp -mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp -mlir/lib/Dialect/Linalg/Transforms/Loops.cpp -mlir/lib/Dialect/Linalg/Transforms/NamedOpConversions.cpp -mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp -mlir/lib/Dialect/Linalg/Transforms/PassDetail.h -mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp -mlir/lib/Dialect/Linalg/Transforms/SparseTensorRewriting.cpp -mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp -mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp -mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp -mlir/lib/Dialect/Linalg/Utils/Utils.cpp -mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp -mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp -mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp -mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp -mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h -mlir/lib/Dialect/LLVMIR/Transforms/LegalizeForExport.cpp -mlir/lib/Dialect/LLVMIR/Transforms/PassDetail.h -mlir/lib/Dialect/Math/IR/MathDialect.cpp -mlir/lib/Dialect/Math/IR/MathOps.cpp -mlir/lib/Dialect/Math/Transforms/AlgebraicSimplification.cpp -mlir/lib/Dialect/Math/Transforms/ExpandTanh.cpp -mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp -mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp -mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp -mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp -mlir/lib/Dialect/MemRef/Transforms/FoldSubViewOps.cpp -mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp -mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp -mlir/lib/Dialect/MemRef/Transforms/PassDetail.h -mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp -mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp -mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp -mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp -mlir/lib/Dialect/PDL/IR/PDL.cpp -mlir/lib/Dialect/PDL/IR/PDLTypes.cpp -mlir/lib/Dialect/PDLInterp/IR/PDLInterp.cpp -mlir/lib/Dialect/Quant/IR/QuantOps.cpp -mlir/lib/Dialect/Quant/IR/QuantTypes.cpp -mlir/lib/Dialect/Quant/IR/TypeDetail.h -mlir/lib/Dialect/Quant/IR/TypeParser.cpp -mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp -mlir/lib/Dialect/Quant/Transforms/ConvertSimQuant.cpp -mlir/lib/Dialect/Quant/Transforms/PassDetail.h -mlir/lib/Dialect/Quant/Utils/FakeQuantSupport.cpp -mlir/lib/Dialect/Quant/Utils/QuantizeUtils.cpp -mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp -mlir/lib/Dialect/SCF/SCF.cpp -mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp -mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp -mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp -mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp -mlir/lib/Dialect/SCF/Transforms/LoopRangeFolding.cpp -mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp -mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp -mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp -mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp -mlir/lib/Dialect/SCF/Transforms/PassDetail.h -mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp -mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp -mlir/lib/Dialect/SCF/Utils/Utils.cpp -mlir/lib/Dialect/Shape/IR/Shape.cpp -mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp -mlir/lib/Dialect/Shape/Transforms/PassDetail.h -mlir/lib/Dialect/Shape/Transforms/RemoveShapeConstraints.cpp -mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp -mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp -mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp -mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp -mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp -mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h -mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp -mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp -mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp -mlir/lib/Dialect/SPIRV/IR/SPIRVAttributes.cpp -mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp -mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp -mlir/lib/Dialect/SPIRV/IR/SPIRVGLSLCanonicalization.cpp -mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp -mlir/lib/Dialect/SPIRV/IR/TargetAndABI.cpp -mlir/lib/Dialect/SPIRV/Linking/ModuleCombiner/ModuleCombiner.cpp -mlir/lib/Dialect/SPIRV/Transforms/DecorateCompositeTypeLayoutPass.cpp -mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp -mlir/lib/Dialect/SPIRV/Transforms/PassDetail.h -mlir/lib/Dialect/SPIRV/Transforms/RewriteInsertsPass.cpp -mlir/lib/Dialect/SPIRV/Transforms/UnifyAliasedResourcePass.cpp -mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp -mlir/lib/Dialect/SPIRV/Utils/LayoutUtils.cpp -mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp -mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp -mlir/lib/Dialect/Tensor/IR/TensorOps.cpp -mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp -mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp -mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp -mlir/lib/Dialect/Tensor/Transforms/PassDetail.h -mlir/lib/Dialect/Tensor/Utils/Utils.cpp -mlir/lib/Dialect/Tosa/IR/TosaOps.cpp -mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp -mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp -mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp -mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp -mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp -mlir/lib/Dialect/Tosa/Transforms/TosaOptionalDecompositions.cpp -mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp -mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp -mlir/lib/Dialect/Utils/IndexingUtils.cpp -mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp -mlir/lib/Dialect/Utils/StaticValueUtils.cpp -mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp -mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp -mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp -mlir/lib/Dialect/Vector/Transforms/PassDetail.h -mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp -mlir/lib/Dialect/Vector/Transforms/VectorInsertExtractStridedSliceRewritePatterns.cpp -mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp -mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp -mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp -mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp -mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp -mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp -mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp -mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp -mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp -mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp -mlir/lib/ExecutionEngine/AsyncRuntime.cpp -mlir/lib/ExecutionEngine/CRunnerUtils.cpp -mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp -mlir/lib/ExecutionEngine/ExecutionEngine.cpp -mlir/lib/ExecutionEngine/JitRunner.cpp -mlir/lib/ExecutionEngine/OptUtils.cpp -mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp -mlir/lib/ExecutionEngine/RunnerUtils.cpp -mlir/lib/ExecutionEngine/SparseTensorUtils.cpp -mlir/lib/Interfaces/CallInterfaces.cpp -mlir/lib/Interfaces/CastInterfaces.cpp -mlir/lib/Interfaces/ControlFlowInterfaces.cpp -mlir/lib/Interfaces/CopyOpInterface.cpp -mlir/lib/Interfaces/DataLayoutInterfaces.cpp -mlir/lib/Interfaces/DerivedAttributeOpInterface.cpp -mlir/lib/Interfaces/InferTypeOpInterface.cpp -mlir/lib/Interfaces/LoopLikeInterface.cpp -mlir/lib/Interfaces/SideEffectInterfaces.cpp -mlir/lib/Interfaces/TilingInterface.cpp -mlir/lib/Interfaces/VectorInterfaces.cpp -mlir/lib/Interfaces/ViewLikeInterface.cpp -mlir/lib/IR/AffineExprDetail.h -mlir/lib/IR/AffineMap.cpp -mlir/lib/IR/AffineMapDetail.h -mlir/lib/IR/AttributeDetail.h -mlir/lib/IR/Attributes.cpp -mlir/lib/IR/Builders.cpp -mlir/lib/IR/BuiltinAttributeInterfaces.cpp -mlir/lib/IR/BuiltinAttributes.cpp -mlir/lib/IR/BuiltinDialect.cpp -mlir/lib/IR/BuiltinTypeInterfaces.cpp -mlir/lib/IR/BuiltinTypes.cpp -mlir/lib/IR/Dialect.cpp -mlir/lib/IR/Dominance.cpp -mlir/lib/IR/FunctionImplementation.cpp -mlir/lib/IR/IntegerSet.cpp -mlir/lib/IR/IntegerSetDetail.h -mlir/lib/IR/Location.cpp -mlir/lib/IR/MLIRContext.cpp -mlir/lib/IR/Operation.cpp -mlir/lib/IR/PatternMatch.cpp -mlir/lib/IR/Region.cpp -mlir/lib/IR/RegionKindInterface.cpp -mlir/lib/IR/SubElementInterfaces.cpp -mlir/lib/IR/SymbolTable.cpp -mlir/lib/IR/TensorEncoding.cpp -mlir/lib/IR/TypeDetail.h -mlir/lib/IR/TypeRange.cpp -mlir/lib/IR/Types.cpp -mlir/lib/IR/TypeUtilities.cpp -mlir/lib/IR/Verifier.cpp -mlir/lib/IR/Visitors.cpp -mlir/lib/Parser/AffineParser.cpp -mlir/lib/Parser/Lexer.cpp -mlir/lib/Parser/Lexer.h -mlir/lib/Parser/LocationParser.cpp -mlir/lib/Parser/Parser.cpp -mlir/lib/Parser/Parser.h -mlir/lib/Parser/ParserState.h -mlir/lib/Parser/Token.cpp -mlir/lib/Parser/Token.h -mlir/lib/Parser/TypeParser.cpp -mlir/lib/Pass/Pass.cpp -mlir/lib/Pass/PassCrashRecovery.cpp -mlir/lib/Pass/PassDetail.h -mlir/lib/Pass/PassManagerOptions.cpp -mlir/lib/Pass/PassStatistics.cpp -mlir/lib/Pass/PassTiming.cpp -mlir/lib/Reducer/OptReductionPass.cpp -mlir/lib/Reducer/ReductionNode.cpp -mlir/lib/Reducer/ReductionTreePass.cpp -mlir/lib/Reducer/Tester.cpp -mlir/lib/Rewrite/ByteCode.cpp -mlir/lib/Rewrite/ByteCode.h -mlir/lib/Rewrite/FrozenRewritePatternSet.cpp -mlir/lib/Support/DebugCounter.cpp -mlir/lib/Support/FileUtilities.cpp -mlir/lib/Support/IndentedOstream.cpp -mlir/lib/Support/InterfaceSupport.cpp -mlir/lib/Support/MlirOptMain.cpp -mlir/lib/Support/StorageUniquer.cpp -mlir/lib/Support/Timing.cpp -mlir/lib/Support/ToolUtilities.cpp -mlir/lib/TableGen/Argument.cpp -mlir/lib/TableGen/Attribute.cpp -mlir/lib/TableGen/AttrOrTypeDef.cpp -mlir/lib/TableGen/Builder.cpp -mlir/lib/TableGen/Class.cpp -mlir/lib/TableGen/Constraint.cpp -mlir/lib/TableGen/Dialect.cpp -mlir/lib/TableGen/Format.cpp -mlir/lib/TableGen/Interfaces.cpp -mlir/lib/TableGen/Operator.cpp -mlir/lib/TableGen/Pass.cpp -mlir/lib/TableGen/Pattern.cpp -mlir/lib/TableGen/Predicate.cpp -mlir/lib/TableGen/Region.cpp -mlir/lib/TableGen/SideEffects.cpp -mlir/lib/TableGen/Successor.cpp -mlir/lib/TableGen/Trait.cpp -mlir/lib/TableGen/Type.cpp -mlir/lib/Target/Cpp/TranslateRegistration.cpp -mlir/lib/Target/Cpp/TranslateToCpp.cpp -mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp -mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp -mlir/lib/Target/LLVMIR/DebugTranslation.cpp -mlir/lib/Target/LLVMIR/DebugTranslation.h -mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp -mlir/lib/Target/LLVMIR/TypeToLLVM.cpp -mlir/lib/Target/LLVMIR/Dialect/AMX/AMXToLLVMIRTranslation.cpp -mlir/lib/Target/LLVMIR/Dialect/ArmNeon/ArmNeonToLLVMIRTranslation.cpp -mlir/lib/Target/LLVMIR/Dialect/ArmSVE/ArmSVEToLLVMIRTranslation.cpp -mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp -mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp -mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp -mlir/lib/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.cpp -mlir/lib/Target/LLVMIR/Dialect/X86Vector/X86VectorToLLVMIRTranslation.cpp -mlir/lib/Target/SPIRV/SPIRVBinaryUtils.cpp -mlir/lib/Target/SPIRV/TranslateRegistration.cpp -mlir/lib/Target/SPIRV/Deserialization/Deserialization.cpp -mlir/lib/Target/SPIRV/Deserialization/DeserializeOps.cpp -mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp -mlir/lib/Target/SPIRV/Serialization/Serialization.cpp -mlir/lib/Target/SPIRV/Serialization/SerializeOps.cpp -mlir/lib/Target/SPIRV/Serialization/Serializer.cpp -mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp -mlir/lib/Tools/mlir-lsp-server/LSPServer.h -mlir/lib/Tools/mlir-lsp-server/MlirLspServerMain.cpp -mlir/lib/Tools/mlir-lsp-server/MLIRServer.h -mlir/lib/Tools/mlir-lsp-server/lsp/Logging.cpp -mlir/lib/Tools/mlir-lsp-server/lsp/Protocol.cpp -mlir/lib/Tools/mlir-lsp-server/lsp/Transport.cpp -mlir/lib/Tools/mlir-lsp-server/lsp/Transport.h -mlir/lib/Tools/mlir-reduce/MlirReduceMain.cpp -mlir/lib/Tools/PDLL/AST/Context.cpp -mlir/lib/Tools/PDLL/AST/Diagnostic.cpp -mlir/lib/Tools/PDLL/AST/NodePrinter.cpp -mlir/lib/Tools/PDLL/AST/TypeDetail.h -mlir/lib/Tools/PDLL/AST/Types.cpp -mlir/lib/Tools/PDLL/CodeGen/CPPGen.cpp -mlir/lib/Tools/PDLL/ODS/Context.cpp -mlir/lib/Tools/PDLL/ODS/Dialect.cpp -mlir/lib/Tools/PDLL/ODS/Operation.cpp -mlir/lib/Tools/PDLL/Parser/Parser.cpp -mlir/lib/Transforms/Canonicalizer.cpp -mlir/lib/Transforms/ControlFlowSink.cpp -mlir/lib/Transforms/CSE.cpp -mlir/lib/Transforms/Inliner.cpp -mlir/lib/Transforms/LocationSnapshot.cpp -mlir/lib/Transforms/LoopInvariantCodeMotion.cpp -mlir/lib/Transforms/PassDetail.h -mlir/lib/Transforms/SCCP.cpp -mlir/lib/Transforms/StripDebugInfo.cpp -mlir/lib/Transforms/SymbolDCE.cpp -mlir/lib/Transforms/SymbolPrivatize.cpp -mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp -mlir/lib/Transforms/Utils/DialectConversion.cpp -mlir/lib/Transforms/Utils/FoldUtils.cpp -mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp -mlir/lib/Transforms/Utils/InliningUtils.cpp -mlir/lib/Transforms/Utils/RegionUtils.cpp -mlir/lib/Translation/Translation.cpp -mlir/tools/mlir-cpu-runner/mlir-cpu-runner.cpp -mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp -mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp -mlir/tools/mlir-opt/mlir-opt.cpp -mlir/tools/mlir-pdll/mlir-pdll.cpp -mlir/tools/mlir-reduce/mlir-reduce.cpp -mlir/tools/mlir-shlib/mlir-shlib.cpp -mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp -mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp -mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp -mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.h -mlir/tools/mlir-tblgen/CodeGenHelpers.cpp -mlir/tools/mlir-tblgen/DialectGen.cpp -mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp -mlir/tools/mlir-tblgen/DocGenUtilities.h -mlir/tools/mlir-tblgen/EnumsGen.cpp -mlir/tools/mlir-tblgen/FormatGen.cpp -mlir/tools/mlir-tblgen/FormatGen.h -mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp -mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp -mlir/tools/mlir-tblgen/mlir-tblgen.cpp -mlir/tools/mlir-tblgen/OpClass.cpp -mlir/tools/mlir-tblgen/OpClass.h -mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp -mlir/tools/mlir-tblgen/OpDocGen.cpp -mlir/tools/mlir-tblgen/OpFormatGen.h -mlir/tools/mlir-tblgen/OpGenHelpers.cpp -mlir/tools/mlir-tblgen/OpGenHelpers.h -mlir/tools/mlir-tblgen/OpInterfacesGen.cpp -mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp -mlir/tools/mlir-tblgen/PassCAPIGen.cpp -mlir/tools/mlir-tblgen/PassDocGen.cpp -mlir/tools/mlir-tblgen/PassGen.cpp -mlir/tools/mlir-tblgen/RewriterGen.cpp -mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp -mlir/tools/mlir-tblgen/StructsGen.cpp -mlir/tools/mlir-translate/mlir-translate.cpp -mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp -mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp -mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp -mlir/tools/mlir-vulkan-runner/VulkanRuntime.h -mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp -mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp -mlir/unittests/Analysis/Presburger/MatrixTest.cpp -mlir/unittests/Analysis/Presburger/PresburgerSetTest.cpp -mlir/unittests/Analysis/Presburger/PresburgerSpaceTest.cpp -mlir/unittests/Analysis/Presburger/PWMAFunctionTest.cpp -mlir/unittests/Analysis/Presburger/SimplexTest.cpp -mlir/unittests/Analysis/Presburger/Utils.h -mlir/unittests/Conversion/PDLToPDLInterp/RootOrderingTest.cpp -mlir/unittests/Dialect/BroadcastShapeTest.cpp -mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.cpp -mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.h -mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParserTest.cpp -mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp -mlir/unittests/Dialect/SparseTensor/MergerTest.cpp -mlir/unittests/Dialect/SPIRV/DeserializationTest.cpp -mlir/unittests/Dialect/SPIRV/SerializationTest.cpp -mlir/unittests/Dialect/Utils/StructuredOpsUtilsTest.cpp -mlir/unittests/ExecutionEngine/Invoke.cpp -mlir/unittests/Interfaces/ControlFlowInterfacesTest.cpp -mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp -mlir/unittests/Interfaces/InferTypeOpInterfaceTest.cpp -mlir/unittests/IR/AttributeTest.cpp -mlir/unittests/IR/DialectTest.cpp -mlir/unittests/IR/InterfaceAttachmentTest.cpp -mlir/unittests/IR/OperationSupportTest.cpp -mlir/unittests/IR/PatternMatchTest.cpp -mlir/unittests/IR/ShapedTypeTest.cpp -mlir/unittests/IR/SubElementInterfaceTest.cpp -mlir/unittests/Pass/AnalysisManagerTest.cpp -mlir/unittests/Pass/PassManagerTest.cpp -mlir/unittests/Pass/PassPipelineParserTest.cpp -mlir/unittests/Rewrite/PatternBenefit.cpp -mlir/unittests/Support/DebugCounterTest.cpp -mlir/unittests/Support/IndentedOstreamTest.cpp -mlir/unittests/Support/MathExtrasTest.cpp -mlir/unittests/Support/StorageUniquerTest.cpp -mlir/unittests/TableGen/EnumsGenTest.cpp -mlir/unittests/TableGen/FormatTest.cpp -mlir/unittests/TableGen/PassGenTest.cpp -mlir/unittests/Transforms/Canonicalizer.cpp -mlir/unittests/Transforms/DialectConversion.cpp -openmp/libompd/src/Debug.h -openmp/libompd/src/omp-debug.cpp -openmp/libompd/src/omp-debug.h -openmp/libompd/src/omp-icv.cpp -openmp/libompd/src/omp-state.cpp -openmp/libompd/src/ompd-private.h -openmp/libompd/src/ompd-types.h -openmp/libompd/src/TargetValue.cpp -openmp/libompd/src/TargetValue.h -openmp/libomptarget/DeviceRTL/include/Configuration.h -openmp/libomptarget/DeviceRTL/include/Debug.h -openmp/libomptarget/DeviceRTL/include/Interface.h -openmp/libomptarget/DeviceRTL/include/Mapping.h -openmp/libomptarget/DeviceRTL/include/State.h -openmp/libomptarget/DeviceRTL/include/Synchronization.h -openmp/libomptarget/DeviceRTL/include/Types.h -openmp/libomptarget/DeviceRTL/include/Utils.h -openmp/libomptarget/DeviceRTL/src/Configuration.cpp -openmp/libomptarget/DeviceRTL/src/Kernel.cpp -openmp/libomptarget/DeviceRTL/src/Misc.cpp -openmp/libomptarget/DeviceRTL/src/Parallelism.cpp -openmp/libomptarget/DeviceRTL/src/Reduction.cpp -openmp/libomptarget/DeviceRTL/src/State.cpp -openmp/libomptarget/DeviceRTL/src/Synchronization.cpp -openmp/libomptarget/DeviceRTL/src/Tasking.cpp -openmp/libomptarget/DeviceRTL/src/Utils.cpp -openmp/libomptarget/include/Debug.h -openmp/libomptarget/include/device.h -openmp/libomptarget/include/DeviceEnvironment.h -openmp/libomptarget/include/interop.h -openmp/libomptarget/include/omptarget.h -openmp/libomptarget/include/omptargetplugin.h -openmp/libomptarget/include/rtl.h -openmp/libomptarget/include/SourceInfo.h -openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp -openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h -openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.cpp -openmp/libomptarget/plugins/amdgpu/impl/get_elf_mach_gfx_name.h -openmp/libomptarget/plugins/amdgpu/impl/hsa_api.h -openmp/libomptarget/plugins/amdgpu/impl/impl.cpp -openmp/libomptarget/plugins/amdgpu/impl/impl_runtime.h -openmp/libomptarget/plugins/amdgpu/impl/internal.h -openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp -openmp/libomptarget/plugins/amdgpu/impl/msgpack.cpp -openmp/libomptarget/plugins/amdgpu/impl/msgpack.h -openmp/libomptarget/plugins/amdgpu/impl/rt.h -openmp/libomptarget/plugins/amdgpu/src/print_tracing.h -openmp/libomptarget/plugins/common/elf_common/elf_common.cpp -openmp/libomptarget/plugins/common/elf_common/elf_common.h -openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h -openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp -openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h -openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp -openmp/libomptarget/plugins/remote/include/Utils.h -openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp -openmp/libomptarget/plugins/remote/server/Server.cpp -openmp/libomptarget/plugins/remote/server/Server.h -openmp/libomptarget/plugins/remote/src/Client.cpp -openmp/libomptarget/plugins/remote/src/Client.h -openmp/libomptarget/plugins/ve/src/rtl.cpp -openmp/libomptarget/src/api.cpp -openmp/libomptarget/src/interface.cpp -openmp/libomptarget/src/interop.cpp -openmp/libomptarget/src/omptarget.cpp -openmp/libomptarget/src/private.h -openmp/libomptarget/src/rtl.cpp -openmp/libomptarget/tools/deviceinfo/llvm-omp-device-info.cpp -openmp/runtime/doc/doxygen/libomp_interface.h -openmp/runtime/src/extractExternal.cpp -openmp/runtime/src/kmp.h -openmp/runtime/src/kmp_affinity.h -openmp/runtime/src/kmp_alloc.cpp -openmp/runtime/src/kmp_atomic.cpp -openmp/runtime/src/kmp_atomic.h -openmp/runtime/src/kmp_barrier.h -openmp/runtime/src/kmp_cancel.cpp -openmp/runtime/src/kmp_debug.cpp -openmp/runtime/src/kmp_debug.h -openmp/runtime/src/kmp_debugger.cpp -openmp/runtime/src/kmp_debugger.h -openmp/runtime/src/kmp_dispatch.cpp -openmp/runtime/src/kmp_dispatch.h -openmp/runtime/src/kmp_dispatch_hier.h -openmp/runtime/src/kmp_environment.cpp -openmp/runtime/src/kmp_environment.h -openmp/runtime/src/kmp_error.cpp -openmp/runtime/src/kmp_error.h -openmp/runtime/src/kmp_ftn_cdecl.cpp -openmp/runtime/src/kmp_ftn_extra.cpp -openmp/runtime/src/kmp_ftn_os.h -openmp/runtime/src/kmp_ftn_stdcall.cpp -openmp/runtime/src/kmp_global.cpp -openmp/runtime/src/kmp_i18n.cpp -openmp/runtime/src/kmp_i18n.h -openmp/runtime/src/kmp_import.cpp -openmp/runtime/src/kmp_io.cpp -openmp/runtime/src/kmp_io.h -openmp/runtime/src/kmp_itt.cpp -openmp/runtime/src/kmp_itt.h -openmp/runtime/src/kmp_lock.cpp -openmp/runtime/src/kmp_omp.h -openmp/runtime/src/kmp_platform.h -openmp/runtime/src/kmp_safe_c_api.h -openmp/runtime/src/kmp_sched.cpp -openmp/runtime/src/kmp_settings.cpp -openmp/runtime/src/kmp_settings.h -openmp/runtime/src/kmp_stats.h -openmp/runtime/src/kmp_stats_timing.cpp -openmp/runtime/src/kmp_stats_timing.h -openmp/runtime/src/kmp_str.cpp -openmp/runtime/src/kmp_str.h -openmp/runtime/src/kmp_stub.cpp -openmp/runtime/src/kmp_stub.h -openmp/runtime/src/kmp_taskdeps.cpp -openmp/runtime/src/kmp_taskdeps.h -openmp/runtime/src/kmp_threadprivate.cpp -openmp/runtime/src/kmp_utility.cpp -openmp/runtime/src/kmp_version.cpp -openmp/runtime/src/kmp_version.h -openmp/runtime/src/kmp_wait_release.cpp -openmp/runtime/src/kmp_wait_release.h -openmp/runtime/src/kmp_wrapper_getpid.h -openmp/runtime/src/kmp_wrapper_malloc.h -openmp/runtime/src/ompd-specific.cpp -openmp/runtime/src/ompd-specific.h -openmp/runtime/src/ompt-event-specific.h -openmp/runtime/src/ompt-general.cpp -openmp/runtime/src/ompt-internal.h -openmp/runtime/src/ompt-specific.cpp -openmp/runtime/src/ompt-specific.h -openmp/runtime/src/z_Linux_util.cpp -openmp/runtime/src/z_Windows_NT-586_util.cpp -openmp/runtime/src/z_Windows_NT_util.cpp -openmp/runtime/src/thirdparty/ittnotify/ittnotify.h -openmp/runtime/src/thirdparty/ittnotify/ittnotify_config.h -openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.cpp -openmp/runtime/src/thirdparty/ittnotify/ittnotify_static.h -openmp/runtime/src/thirdparty/ittnotify/ittnotify_types.h -openmp/runtime/src/thirdparty/ittnotify/legacy/ittnotify.h -openmp/tools/archer/ompt-tsan.cpp -openmp/tools/archer/tests/ompt/ompt-signal.h -openmp/tools/multiplex/ompt-multiplex.h -openmp/tools/multiplex/tests/ompt-signal.h -openmp/tools/multiplex/tests/custom_data_storage/second-tool.h -openmp/tools/multiplex/tests/print/first-tool.h -openmp/tools/multiplex/tests/print/second-tool.h -polly/include/polly/Canonicalization.h -polly/include/polly/CodePreparation.h -polly/include/polly/DeadCodeElimination.h -polly/include/polly/DeLICM.h -polly/include/polly/DependenceInfo.h -polly/include/polly/FlattenAlgo.h -polly/include/polly/FlattenSchedule.h -polly/include/polly/ForwardOpTree.h -polly/include/polly/JSONExporter.h -polly/include/polly/LinkAllPasses.h -polly/include/polly/ManualOptimizer.h -polly/include/polly/MatmulOptimizer.h -polly/include/polly/Options.h -polly/include/polly/PolyhedralInfo.h -polly/include/polly/PruneUnprofitable.h -polly/include/polly/RegisterPasses.h -polly/include/polly/ScheduleOptimizer.h -polly/include/polly/ScheduleTreeTransform.h -polly/include/polly/ScopBuilder.h -polly/include/polly/ScopDetection.h -polly/include/polly/ScopDetectionDiagnostic.h -polly/include/polly/ScopInfo.h -polly/include/polly/ScopPass.h -polly/include/polly/Simplify.h -polly/include/polly/ZoneAlgo.h -polly/include/polly/CodeGen/BlockGenerators.h -polly/include/polly/CodeGen/CodegenCleanup.h -polly/include/polly/CodeGen/CodeGeneration.h -polly/include/polly/CodeGen/IRBuilder.h -polly/include/polly/CodeGen/IslAst.h -polly/include/polly/CodeGen/IslExprBuilder.h -polly/include/polly/CodeGen/IslNodeBuilder.h -polly/include/polly/CodeGen/LoopGenerators.h -polly/include/polly/CodeGen/LoopGeneratorsGOMP.h -polly/include/polly/CodeGen/LoopGeneratorsKMP.h -polly/include/polly/CodeGen/PerfMonitor.h -polly/include/polly/CodeGen/PPCGCodeGeneration.h -polly/include/polly/CodeGen/RuntimeDebugBuilder.h -polly/include/polly/CodeGen/Utils.h -polly/include/polly/Support/DumpFunctionPass.h -polly/include/polly/Support/DumpModulePass.h -polly/include/polly/Support/GICHelper.h -polly/include/polly/Support/ISLOperators.h -polly/include/polly/Support/ISLOStream.h -polly/include/polly/Support/ISLTools.h -polly/include/polly/Support/LinkGPURuntime.h -polly/include/polly/Support/SCEVAffinator.h -polly/include/polly/Support/SCEVValidator.h -polly/include/polly/Support/ScopHelper.h -polly/include/polly/Support/ScopLocation.h -polly/include/polly/Support/VirtualInstruction.h -polly/lib/Analysis/DependenceInfo.cpp -polly/lib/Analysis/PolyhedralInfo.cpp -polly/lib/Analysis/PruneUnprofitable.cpp -polly/lib/Analysis/ScopBuilder.cpp -polly/lib/Analysis/ScopDetection.cpp -polly/lib/Analysis/ScopDetectionDiagnostic.cpp -polly/lib/Analysis/ScopGraphPrinter.cpp -polly/lib/Analysis/ScopInfo.cpp -polly/lib/Analysis/ScopPass.cpp -polly/lib/CodeGen/BlockGenerators.cpp -polly/lib/CodeGen/CodegenCleanup.cpp -polly/lib/CodeGen/CodeGeneration.cpp -polly/lib/CodeGen/IRBuilder.cpp -polly/lib/CodeGen/IslAst.cpp -polly/lib/CodeGen/IslExprBuilder.cpp -polly/lib/CodeGen/IslNodeBuilder.cpp -polly/lib/CodeGen/LoopGenerators.cpp -polly/lib/CodeGen/LoopGeneratorsGOMP.cpp -polly/lib/CodeGen/LoopGeneratorsKMP.cpp -polly/lib/CodeGen/ManagedMemoryRewrite.cpp -polly/lib/CodeGen/PerfMonitor.cpp -polly/lib/CodeGen/PPCGCodeGeneration.cpp -polly/lib/CodeGen/RuntimeDebugBuilder.cpp -polly/lib/CodeGen/Utils.cpp -polly/lib/Exchange/JSONExporter.cpp -polly/lib/External/isl/isl_local_private.h -polly/lib/External/isl/imath/iprime.h -polly/lib/External/isl/include/isl/id_type.h -polly/lib/External/isl/include/isl/isl-noexceptions.h -polly/lib/External/isl/include/isl/map_type.h -polly/lib/External/isl/include/isl/printer_type.h -polly/lib/External/isl/include/isl/set_type.h -polly/lib/External/isl/include/isl/union_map_type.h -polly/lib/External/isl/include/isl/union_set_type.h -polly/lib/External/isl/include/isl/val_type.h -polly/lib/External/isl/include/isl/version.h -polly/lib/External/isl/interface/extract_interface.h -polly/lib/Plugin/Polly.cpp -polly/lib/Support/DumpFunctionPass.cpp -polly/lib/Support/DumpModulePass.cpp -polly/lib/Support/GICHelper.cpp -polly/lib/Support/ISLTools.cpp -polly/lib/Support/Mainpage.h -polly/lib/Support/RegisterPasses.cpp -polly/lib/Support/SCEVAffinator.cpp -polly/lib/Support/SCEVValidator.cpp -polly/lib/Support/ScopHelper.cpp -polly/lib/Support/ScopLocation.cpp -polly/lib/Support/VirtualInstruction.cpp -polly/lib/Transform/Canonicalization.cpp -polly/lib/Transform/CodePreparation.cpp -polly/lib/Transform/DeadCodeElimination.cpp -polly/lib/Transform/DeLICM.cpp -polly/lib/Transform/FlattenAlgo.cpp -polly/lib/Transform/FlattenSchedule.cpp -polly/lib/Transform/ForwardOpTree.cpp -polly/lib/Transform/ManualOptimizer.cpp -polly/lib/Transform/MatmulOptimizer.cpp -polly/lib/Transform/MaximalStaticExpansion.cpp -polly/lib/Transform/ScheduleOptimizer.cpp -polly/lib/Transform/ScheduleTreeTransform.cpp -polly/lib/Transform/ScopInliner.cpp -polly/lib/Transform/Simplify.cpp -polly/lib/Transform/ZoneAlgo.cpp -polly/tools/GPURuntime/GPUJIT.h -polly/unittests/DeLICM/DeLICMTest.cpp -polly/unittests/Flatten/FlattenTest.cpp -polly/unittests/Isl/IslTest.cpp -polly/unittests/ScheduleOptimizer/ScheduleTreeTransformTest.cpp -polly/unittests/ScopPassManager/PassManagerTest.cpp -polly/unittests/Support/ISLTools.cpp -pstl/include/pstl/internal/algorithm_fwd.h -pstl/include/pstl/internal/execution_defs.h -pstl/include/pstl/internal/execution_impl.h -pstl/include/pstl/internal/glue_algorithm_defs.h -pstl/include/pstl/internal/glue_algorithm_impl.h -pstl/include/pstl/internal/glue_execution_defs.h -pstl/include/pstl/internal/glue_memory_defs.h -pstl/include/pstl/internal/glue_memory_impl.h -pstl/include/pstl/internal/glue_numeric_defs.h -pstl/include/pstl/internal/glue_numeric_impl.h -pstl/include/pstl/internal/numeric_fwd.h -pstl/include/pstl/internal/parallel_backend.h -pstl/include/pstl/internal/parallel_backend_omp.h -pstl/include/pstl/internal/parallel_backend_serial.h -pstl/include/pstl/internal/parallel_backend_utils.h -pstl/include/pstl/internal/parallel_impl.h -pstl/include/pstl/internal/omp/parallel_for.h -pstl/include/pstl/internal/omp/parallel_for_each.h -pstl/include/pstl/internal/omp/parallel_invoke.h -pstl/include/pstl/internal/omp/parallel_reduce.h -pstl/include/pstl/internal/omp/parallel_scan.h -pstl/include/pstl/internal/omp/parallel_stable_partial_sort.h -pstl/include/pstl/internal/omp/parallel_transform_scan.h -pstl/include/pstl/internal/omp/util.h -third-party/benchmark/cmake/thread_safety_attributes.cpp -third-party/benchmark/src/arraysize.h -third-party/benchmark/src/benchmark_api_internal.h -third-party/benchmark/src/benchmark_register.h -third-party/benchmark/src/benchmark_runner.h -third-party/benchmark/src/check.h -third-party/benchmark/src/colorprint.h -third-party/benchmark/src/commandlineflags.h -third-party/benchmark/src/complexity.h -third-party/benchmark/src/counter.h -third-party/benchmark/src/cycleclock.h -third-party/benchmark/src/internal_macros.h -third-party/benchmark/src/log.h -third-party/benchmark/src/mutex.h -third-party/benchmark/src/perf_counters.h -third-party/benchmark/src/re.h -third-party/benchmark/src/sleep.h -third-party/benchmark/src/statistics.h -third-party/benchmark/src/string_util.h -third-party/benchmark/src/thread_manager.h -third-party/benchmark/src/thread_timer.h -third-party/benchmark/src/timers.h -utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h -utils/bazel/llvm-project-overlay/llvm/include/llvm/Config/config.h diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index 54e484d41fb1c3..c77140842d7a6e 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -6750,7 +6750,8 @@ AST_POLYMORPHIC_MATCHER(isTemplateInstantiation, /// matches 'A(int) {...};' and 'A(unsigned) {...}'. AST_MATCHER_FUNCTION(internal::Matcher, isInstantiated) { auto IsInstantiation = decl(anyOf(cxxRecordDecl(isTemplateInstantiation()), - functionDecl(isTemplateInstantiation()))); + functionDecl(isTemplateInstantiation()), + varDecl(isTemplateInstantiation()))); return decl(anyOf(IsInstantiation, hasAncestor(IsInstantiation))); } @@ -6769,9 +6770,9 @@ AST_MATCHER_FUNCTION(internal::Matcher, isInstantiated) { /// will NOT match j += 42; as it's shared between the template definition and /// instantiation. AST_MATCHER_FUNCTION(internal::Matcher, isInTemplateInstantiation) { - return stmt( - hasAncestor(decl(anyOf(cxxRecordDecl(isTemplateInstantiation()), - functionDecl(isTemplateInstantiation()))))); + return stmt(hasAncestor(decl(anyOf(cxxRecordDecl(isTemplateInstantiation()), + functionDecl(isTemplateInstantiation()), + varDecl(isTemplateInstantiation()))))); } /// Matches explicit template specializations of function, class, or diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 7a130c434e73ce..fbbfc4acdf391e 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3702,20 +3702,32 @@ user-declared functions. For example: .. code-block:: c++ + #include + #include + + using namespace std::literals; + // Returns m[key] if key is present, or default_value if not. template const U &get_or_default(const std::map &m [[clang::lifetimebound]], const T &key, /* note, not lifetimebound */ - const U &default_value [[clang::lifetimebound]]); + const U &default_value [[clang::lifetimebound]]) { + if (auto iter = m.find(key); iter != m.end()) return iter->second; + else return default_value; + } - std::map m; - // warning: temporary "bar"s that might be bound to local reference 'val' - // will be destroyed at the end of the full-expression - const std::string &val = get_or_default(m, "foo"s, "bar"s); + int main() { + std::map m; + // warning: temporary bound to local reference 'val1' will be destroyed + // at the end of the full-expression + const std::string &val1 = get_or_default(m, "foo"s, "bar"s); - // No warning in this case. - std::string def_val = "bar"s; - const std::string &val = get_or_default(m, "foo"s, def_val); + // No warning in this case. + std::string def_val = "bar"s; + const std::string &val2 = get_or_default(m, "foo"s, def_val); + + return 0; + } The attribute can be applied to the implicit ``this`` parameter of a member function by writing the attribute after the function type: diff --git a/clang/include/clang/Basic/BuiltinsBase.td b/clang/include/clang/Basic/BuiltinsBase.td index 58dee22fc0a450..cff182f3f282cb 100644 --- a/clang/include/clang/Basic/BuiltinsBase.td +++ b/clang/include/clang/Basic/BuiltinsBase.td @@ -60,6 +60,10 @@ def ConstIgnoringExceptions : Attribute<"g">; // This function requires a specific header or an explicit declaration. def RequireDeclaration : Attribute<"h">; +// FIXME: Why is this not simply the min_vector_width attribute? +// Vector has to be at least N bits wide. +class RequiredVectorWidth : IndexedAttribute<"V", N>; + class PrintfFormat : IndexedAttribute<"p", I>; class VPrintfFormat : IndexedAttribute<"P", I>; class ScanfFormat : IndexedAttribute<"s", I>; diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 4486eb73a11fa6..c93ea27f164e34 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -26,17 +26,6 @@ # define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif -// Undefined Values -// -TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "ncV:128:", "") -TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "ncV:256:", "") -TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "ncV:512:", "") - -// FLAGS -// -TARGET_BUILTIN(__builtin_ia32_readeflags_u32, "Ui", "n", "") -TARGET_BUILTIN(__builtin_ia32_writeeflags_u32, "vUi", "n", "") - // MMX // // All MMX instructions will be generated via builtins. Any MMX vector @@ -46,113 +35,8 @@ TARGET_BUILTIN(__builtin_ia32_writeeflags_u32, "vUi", "n", "") // argument and our prior approach of using a #define to the current built-in // doesn't work in the presence of re-declaration of _mm_prefetch for windows. TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_emms, "v", "n", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "sV4sIi", "ncV:64:", "sse") -TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4ssIi", "ncV:64:", "sse") // SSE intrinsics. -TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "ncV:128:", "sse") - -TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "ncV:128:", "sse2") - -TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "sse") - -TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "OiV2OiIi", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "ncV:128:", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "ncV:128:", "sse2") - -TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "ncV:128:", "sse3") -TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "ncV:128:", "sse3") -TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "ncV:128:", "sse3") -TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "ncV:128:", "sse3") -TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "ncV:128:", "sse3") -TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "ncV:128:", "sse3") -TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse") TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh",XMMINTRIN_H, ALL_LANGUAGES, "sse") @@ -316,16 +200,6 @@ TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4OiV4OiV4OiIc", "ncV:256:", "vpclm TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8OiV8OiV8OiIc", "ncV:512:", "avx512f,evex512,vpclmulqdq") // AVX -TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "ncV:256:", "avx") -TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2Oi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4Oi", "ncV:256:", "avx") diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td new file mode 100644 index 00000000000000..cf8d2771310e3c --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -0,0 +1,137 @@ +//===--- BuiltinsX86.td - X86 Builtin function database ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the X86-specific builtin function database. +// +//===----------------------------------------------------------------------===// + +include "clang/Basic/BuiltinsBase.td" + +class X86Builtin : TargetBuiltin { + let Spellings = ["__builtin_ia32_" # NAME]; + let Prototype = prototype; +} + +// Undefined Values +def undef128 : X86Builtin<"_Vector<2, double>()"> { + let Attributes = [Const, NoThrow, RequiredVectorWidth<128>]; +} + +def undef256 : X86Builtin<"_Vector<4, double>()"> { + let Attributes = [Const, NoThrow, RequiredVectorWidth<256>]; +} + +def undef512 : X86Builtin<"_Vector<8, double>()"> { + let Attributes = [Const, NoThrow, RequiredVectorWidth<512>]; +} + +// FLAGS +def readeflags_u32 : X86Builtin<"unsigned int()"> { + let Attributes = [NoThrow]; +} + +def writeeflags_u32 : X86Builtin<"void(unsigned int)"> { + let Attributes = [NoThrow]; +} + +// MMX +// +// All MMX instructions will be generated via builtins. Any MMX vector +// types (<1 x i64>, <2 x i32>, etc.) that aren't used by these builtins will be +// expanded by the back-end. + +def emms : X86Builtin<"void()"> { + let Attributes = [NoThrow]; + let Features = "mmx"; +} + +let Attributes = [NoThrow, Const, RequiredVectorWidth<64>], Features = "sse" in { + def vec_ext_v4hi : X86Builtin<"short(_Vector<4, short>, _Constant int)">; + def vec_set_v4hi : X86Builtin<"_Vector<4, short>(_Vector<4, short>, short, _Constant int)">; +} + +// SSE intrinsics +let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { + foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in { + let Features = "sse" in { + def comi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">; + def ucomi#Cmp : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">; + } + let Features = "sse2" in { + def comisd#Cmp : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; + def ucomisd#Cmp : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; + } + } + + foreach Cmp = ["cmpeq", "cmplt", "cmple", "cmpunord", "cmpneq", "cmpnlt", + "cmpnle", "cmpord", "min", "max"] in { + let Features = "sse" in { + def Cmp#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + def Cmp#ss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + } + let Features = "sse2" in { + def Cmp#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + def Cmp#sd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + } + } + + let Features = "sse" in { + def cmpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; + def cmpss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; + } + + let Features = "sse2" in { + def cmppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; + def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; + } + + let Features = "sse2" in { + def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; + def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; + def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; + def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">; + def pmulhuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def vec_ext_v2di : X86Builtin<"long long int(_Vector<2, long long int>, _Constant int)">; + def vec_ext_v4si : X86Builtin<"int(_Vector<4, int>, _Constant int)">; + def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">; + def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">; + def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">; + } + + let Features = "sse3" in { + foreach Op = ["addsub", "hadd", "hsub"] in { + def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">; + def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">; + } + } + + let Features = "ssse3" in { + foreach Op = ["phadd", "phsub"] in { + def Op#w128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def Op#sw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def Op#d128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + } + + def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">; + def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; + def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; + def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; + } +} + +// AVX +let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in { + foreach Op = ["addsub", "hadd", "hsub", "max", "min"] in { + def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">; + def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">; + } +} diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index f069f4fc118f27..76ac3367e23a66 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -60,6 +60,10 @@ clang_tablegen(BuiltinsRISCV.inc -gen-clang-builtins SOURCE BuiltinsRISCV.td TARGET ClangBuiltinsRISCV) +clang_tablegen(BuiltinsX86.inc -gen-clang-builtins + SOURCE BuiltinsX86.td + TARGET ClangBuiltinsX86) + # ARM NEON and MVE clang_tablegen(arm_neon.inc -gen-arm-neon-sema SOURCE arm_neon.td diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index d0f41b17c154f3..89ebf5758a5b55 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -125,6 +125,8 @@ namespace clang { LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1, #define BUILTIN(ID, TYPE, ATTRS) BI##ID, #include "clang/Basic/BuiltinsX86.def" +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/Basic/BuiltinsX86.inc" FirstX86_64Builtin, LastX86CommonBuiltin = FirstX86_64Builtin - 1, #define BUILTIN(ID, TYPE, ATTRS) BI##ID, diff --git a/clang/include/clang/Index/USRGeneration.h b/clang/include/clang/Index/USRGeneration.h index f89fc5cf49302c..61d267f3545a70 100644 --- a/clang/include/clang/Index/USRGeneration.h +++ b/clang/include/clang/Index/USRGeneration.h @@ -15,6 +15,7 @@ namespace clang { class ASTContext; class Decl; +class LangOptions; class MacroDefinitionRecord; class Module; class SourceLocation; @@ -30,6 +31,8 @@ static inline StringRef getUSRSpacePrefix() { /// Generate a USR for a Decl, including the USR prefix. /// \returns true if the results should be ignored, false otherwise. bool generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf); +bool generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf, + const LangOptions &LangOpts); /// Generate a USR fragment for an Objective-C class. void generateUSRForObjCClass(StringRef Cls, raw_ostream &OS, @@ -75,7 +78,10 @@ bool generateUSRForMacro(StringRef MacroName, SourceLocation Loc, /// Generates a USR for a type. /// /// \return true on error, false on success. -bool generateUSRForType(QualType T, ASTContext &Ctx, SmallVectorImpl &Buf); +bool generateUSRForType(QualType T, ASTContext &Ctx, + SmallVectorImpl &Buf); +bool generateUSRForType(QualType T, ASTContext &Ctx, SmallVectorImpl &Buf, + const LangOptions &LangOpts); /// Generate a USR for a module, including the USR prefix. /// \returns true on error, false on success. diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index b3e46508cf596d..14bc260d0245fb 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -468,6 +468,7 @@ class CXXNameMangler { void mangleLambdaSig(const CXXRecordDecl *Lambda); void mangleModuleNamePrefix(StringRef Name, bool IsPartition = false); void mangleVendorQualifier(StringRef Name); + void mangleVendorType(StringRef Name); private: @@ -2891,6 +2892,10 @@ void CXXNameMangler::mangleVendorQualifier(StringRef name) { Out << 'U' << name.size() << name; } +void CXXNameMangler::mangleVendorType(StringRef name) { + Out << 'u' << name.size() << name; +} + void CXXNameMangler::mangleRefQualifier(RefQualifierKind RefQualifier) { // ::= R # lvalue reference // ::= O # rvalue-reference @@ -3413,8 +3418,7 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { if (T->getKind() == BuiltinType::SveBFloat16 && \ isCompatibleWith(LangOptions::ClangABI::Ver17)) { \ /* Prior to Clang 18.0 we used this incorrect mangled name */ \ - type_name = "__SVBFloat16_t"; \ - Out << "u" << type_name.size() << type_name; \ + mangleVendorType("__SVBFloat16_t"); \ } else { \ type_name = MangledName; \ Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ @@ -3436,35 +3440,30 @@ void CXXNameMangler::mangleType(const BuiltinType *T) { Out << (type_name == Name ? "u" : "") << type_name.size() << type_name; \ break; #include "clang/Basic/AArch64SVEACLETypes.def" -#define PPC_VECTOR_TYPE(Name, Id, Size) \ - case BuiltinType::Id: \ - type_name = #Name; \ - Out << 'u' << type_name.size() << type_name; \ +#define PPC_VECTOR_TYPE(Name, Id, Size) \ + case BuiltinType::Id: \ + mangleVendorType(#Name); \ break; #include "clang/Basic/PPCTypes.def" // TODO: Check the mangling scheme for RISC-V V. #define RVV_TYPE(Name, Id, SingletonId) \ case BuiltinType::Id: \ - type_name = Name; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(Name); \ break; #include "clang/Basic/RISCVVTypes.def" #define WASM_REF_TYPE(InternalName, MangledName, Id, SingletonId, AS) \ case BuiltinType::Id: \ - type_name = MangledName; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(MangledName); \ break; #include "clang/Basic/WebAssemblyReferenceTypes.def" #define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \ case BuiltinType::Id: \ - type_name = Name; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(Name); \ break; #include "clang/Basic/AMDGPUTypes.def" #define HLSL_INTANGIBLE_TYPE(Name, Id, SingletonId) \ case BuiltinType::Id: \ - type_name = #Name; \ - Out << 'u' << type_name.size() << type_name; \ + mangleVendorType(#Name); \ break; #include "clang/Basic/HLSLIntangibleTypes.def" } @@ -4035,8 +4034,9 @@ void CXXNameMangler::mangleAArch64FixedSveVectorType(const VectorType *T) { if (T->getVectorKind() == VectorKind::SveFixedLengthPredicate) VecSizeInBits *= 8; - Out << "9__SVE_VLSI" << 'u' << TypeName.size() << TypeName << "Lj" - << VecSizeInBits << "EE"; + Out << "9__SVE_VLSI"; + mangleVendorType(TypeName); + Out << "Lj" << VecSizeInBits << "EE"; } void CXXNameMangler::mangleAArch64FixedSveVectorType( @@ -4136,8 +4136,9 @@ void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) { } TypeNameOS << "_t"; - Out << "9__RVV_VLSI" << 'u' << TypeNameStr.size() << TypeNameStr << "Lj" - << VecSizeInBits << "EE"; + Out << "9__RVV_VLSI"; + mangleVendorType(TypeNameStr); + Out << "Lj" << VecSizeInBits << "EE"; } void CXXNameMangler::mangleRISCVFixedRVVVectorType( @@ -4236,8 +4237,7 @@ void CXXNameMangler::mangleType(const ConstantMatrixType *T) { // Mangle matrix types as a vendor extended type: // umatrix_typeIE - StringRef VendorQualifier = "matrix_type"; - Out << "u" << VendorQualifier.size() << VendorQualifier; + mangleVendorType("matrix_type"); Out << "I"; auto &ASTCtx = getASTContext(); @@ -4255,8 +4255,7 @@ void CXXNameMangler::mangleType(const ConstantMatrixType *T) { void CXXNameMangler::mangleType(const DependentSizedMatrixType *T) { // Mangle matrix types as a vendor extended type: // umatrix_typeIE - StringRef VendorQualifier = "matrix_type"; - Out << "u" << VendorQualifier.size() << VendorQualifier; + mangleVendorType("matrix_type"); Out << "I"; mangleTemplateArgExpr(T->getRowExpr()); @@ -4302,7 +4301,7 @@ void CXXNameMangler::mangleType(const ObjCObjectType *T) { StringRef name = I->getName(); QualOS << name.size() << name; } - Out << 'U' << QualStr.size() << QualStr; + mangleVendorQualifier(QualStr); } mangleType(T->getBaseType()); @@ -4436,8 +4435,6 @@ void CXXNameMangler::mangleType(const UnaryTransformType *T) { // If this is dependent, we need to record that. If not, we simply // mangle it as the underlying type since they are equivalent. if (T->isDependentType()) { - Out << "u"; - StringRef BuiltinName; switch (T->getUTTKind()) { #define TRANSFORM_TYPE_TRAIT_DEF(Enum, Trait) \ @@ -4446,7 +4443,7 @@ void CXXNameMangler::mangleType(const UnaryTransformType *T) { break; #include "clang/Basic/TransformTypeTraits.def" } - Out << BuiltinName.size() << BuiltinName; + mangleVendorType(BuiltinName); } Out << "I"; @@ -5311,9 +5308,8 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, // ::= u * E # vendor extension const TypeTraitExpr *TTE = cast(E); NotPrimaryExpr(); - Out << 'u'; llvm::StringRef Spelling = getTraitSpelling(TTE->getTrait()); - Out << Spelling.size() << Spelling; + mangleVendorType(Spelling); for (TypeSourceInfo *TSI : TTE->getArgs()) { mangleType(TSI->getType()); } diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index fad2f52e89ef14..2c68409b846bc8 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -171,6 +171,12 @@ class MatchDescendantVisitor return VisitorBase::TraverseCXXTypeidExpr(Node); } + bool TraverseCXXDefaultInitExpr(CXXDefaultInitExpr *Node) { + if (!TraverseStmt(Node->getExpr())) + return false; + return VisitorBase::TraverseCXXDefaultInitExpr(Node); + } + bool TraverseStmt(Stmt *Node, DataRecursionQueue *Queue = nullptr) { if (!Node) return true; @@ -1972,14 +1978,18 @@ class DerefSimplePtrArithFixableGadget : public FixableGadget { }; /// Scan the function and return a list of gadgets found with provided kits. -static std::tuple -findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, - bool EmitSuggestions) { +static void findGadgets(const Stmt *S, ASTContext &Ctx, + const UnsafeBufferUsageHandler &Handler, + bool EmitSuggestions, FixableGadgetList &FixableGadgets, + WarningGadgetList &WarningGadgets, + DeclUseTracker &Tracker) { struct GadgetFinderCallback : MatchFinder::MatchCallback { - FixableGadgetList FixableGadgets; - WarningGadgetList WarningGadgets; - DeclUseTracker Tracker; + GadgetFinderCallback(FixableGadgetList &FixableGadgets, + WarningGadgetList &WarningGadgets, + DeclUseTracker &Tracker) + : FixableGadgets(FixableGadgets), WarningGadgets(WarningGadgets), + Tracker(Tracker) {} void run(const MatchFinder::MatchResult &Result) override { // In debug mode, assert that we've found exactly one gadget. @@ -2020,10 +2030,14 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, assert(numFound >= 1 && "Gadgets not found in match result!"); assert(numFound <= 1 && "Conflicting bind tags in gadgets!"); } + + FixableGadgetList &FixableGadgets; + WarningGadgetList &WarningGadgets; + DeclUseTracker &Tracker; }; MatchFinder M; - GadgetFinderCallback CB; + GadgetFinderCallback CB{FixableGadgets, WarningGadgets, Tracker}; // clang-format off M.addMatcher( @@ -2068,9 +2082,7 @@ findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler, // clang-format on } - M.match(*D->getBody(), D->getASTContext()); - return {std::move(CB.FixableGadgets), std::move(CB.WarningGadgets), - std::move(CB.Tracker)}; + M.match(*S, Ctx); } // Compares AST nodes by source locations. @@ -3614,39 +3626,9 @@ class VariableGroupsManagerImpl : public VariableGroupsManager { } }; -void clang::checkUnsafeBufferUsage(const Decl *D, - UnsafeBufferUsageHandler &Handler, - bool EmitSuggestions) { -#ifndef NDEBUG - Handler.clearDebugNotes(); -#endif - - assert(D && D->getBody()); - // We do not want to visit a Lambda expression defined inside a method - // independently. Instead, it should be visited along with the outer method. - // FIXME: do we want to do the same thing for `BlockDecl`s? - if (const auto *fd = dyn_cast(D)) { - if (fd->getParent()->isLambda() && fd->getParent()->isLocalClass()) - return; - } - - // Do not emit fixit suggestions for functions declared in an - // extern "C" block. - if (const auto *FD = dyn_cast(D)) { - for (FunctionDecl *FReDecl : FD->redecls()) { - if (FReDecl->isExternC()) { - EmitSuggestions = false; - break; - } - } - } - - WarningGadgetSets UnsafeOps; - FixableGadgetSets FixablesForAllVars; - - auto [FixableGadgets, WarningGadgets, Tracker] = - findGadgets(D, Handler, EmitSuggestions); - +void applyGadgets(const Decl *D, FixableGadgetList FixableGadgets, + WarningGadgetList WarningGadgets, DeclUseTracker Tracker, + UnsafeBufferUsageHandler &Handler, bool EmitSuggestions) { if (!EmitSuggestions) { // Our job is very easy without suggestions. Just warn about // every problematic operation and consider it done. No need to deal @@ -3690,8 +3672,10 @@ void clang::checkUnsafeBufferUsage(const Decl *D, if (WarningGadgets.empty()) return; - UnsafeOps = groupWarningGadgetsByVar(std::move(WarningGadgets)); - FixablesForAllVars = groupFixablesByVar(std::move(FixableGadgets)); + WarningGadgetSets UnsafeOps = + groupWarningGadgetsByVar(std::move(WarningGadgets)); + FixableGadgetSets FixablesForAllVars = + groupFixablesByVar(std::move(FixableGadgets)); std::map FixItsForVariableGroup; @@ -3912,3 +3896,56 @@ void clang::checkUnsafeBufferUsage(const Decl *D, } } } + +void clang::checkUnsafeBufferUsage(const Decl *D, + UnsafeBufferUsageHandler &Handler, + bool EmitSuggestions) { +#ifndef NDEBUG + Handler.clearDebugNotes(); +#endif + + assert(D); + + SmallVector Stmts; + + if (const auto *FD = dyn_cast(D)) { + // We do not want to visit a Lambda expression defined inside a method + // independently. Instead, it should be visited along with the outer method. + // FIXME: do we want to do the same thing for `BlockDecl`s? + if (const auto *MD = dyn_cast(D)) { + if (MD->getParent()->isLambda() && MD->getParent()->isLocalClass()) + return; + } + + for (FunctionDecl *FReDecl : FD->redecls()) { + if (FReDecl->isExternC()) { + // Do not emit fixit suggestions for functions declared in an + // extern "C" block. + EmitSuggestions = false; + break; + } + } + + Stmts.push_back(FD->getBody()); + + if (const auto *ID = dyn_cast(D)) { + for (const CXXCtorInitializer *CI : ID->inits()) { + Stmts.push_back(CI->getInit()); + } + } + } else if (isa(D) || isa(D)) { + Stmts.push_back(D->getBody()); + } + + assert(!Stmts.empty()); + + FixableGadgetList FixableGadgets; + WarningGadgetList WarningGadgets; + DeclUseTracker Tracker; + for (Stmt *S : Stmts) { + findGadgets(S, D->getASTContext(), Handler, EmitSuggestions, FixableGadgets, + WarningGadgets, Tracker); + } + applyGadgets(D, std::move(FixableGadgets), std::move(WarningGadgets), + std::move(Tracker), Handler, EmitSuggestions); +} diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index a0f94d5d315480..3d8de0294d4ba3 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -765,8 +765,6 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const { .Case("i8mm", HasMatMul) .Case("bf16", HasBFloat16) .Case("sve", FPU & SveMode) - .Case("sve-bf16", FPU & SveMode && HasBFloat16) - .Case("sve-i8mm", FPU & SveMode && HasMatMul) .Case("sve-b16b16", HasSVEB16B16) .Case("f32mm", FPU & SveMode && HasMatmulFP32) .Case("f64mm", FPU & SveMode && HasMatmulFP64) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 700c2f9a5dbd18..82d29ea9fea5c4 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -32,6 +32,14 @@ static constexpr Builtin::Info BuiltinInfoX86[] = { {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS}, #include "clang/Basic/BuiltinsX86.def" +#define BUILTIN(ID, TYPE, ATTRS) \ + {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ + {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, +#define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANGS, FEATURE) \ + {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::HEADER, LANGS}, +#include "clang/Basic/BuiltinsX86.inc" + #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 41bb8d19d161eb..bfa9b0a2f836bc 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -2590,10 +2590,6 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { if (it != BlockByrefInfos.end()) return it->second; - llvm::StructType *byrefType = - llvm::StructType::create(getLLVMContext(), - "struct.__block_byref_" + D->getNameAsString()); - QualType Ty = D->getType(); CharUnits size; @@ -2658,7 +2654,9 @@ const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) { } types.push_back(varTy); - byrefType->setBody(types, packed); + llvm::StructType *byrefType = llvm::StructType::create( + getLLVMContext(), types, "struct.__block_byref_" + D->getNameAsString(), + packed); BlockByrefInfo info; info.Type = byrefType; diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp index 06558ce796f2e4..7ba0d615018181 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.cpp +++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp @@ -306,6 +306,16 @@ void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) { continue; llvm::hlsl::ResourceClass RC = AttrResType->getAttrs().ResourceClass; + if (RC == llvm::hlsl::ResourceClass::UAV || + RC == llvm::hlsl::ResourceClass::SRV) + // UAVs and SRVs have already been converted to use LLVM target types, + // we can disable generating of these resource annotations. This will + // enable progress on structured buffers with user defined types this + // resource annotations code does not handle and it crashes. + // This whole function is going to be removed as soon as cbuffers are + // converted to target types (llvm/llvm-project #114126). + return; + bool IsROV = AttrResType->getAttrs().IsROV; llvm::hlsl::ResourceKind RK = HLSLResAttr->getResourceKind(); llvm::hlsl::ElementType ET = calculateElementType(CGM.getContext(), Ty); diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp index 7a07284f8a8aa5..d6f5f2a43cf51b 100644 --- a/clang/lib/CodeGen/CGObjCGNU.cpp +++ b/clang/lib/CodeGen/CGObjCGNU.cpp @@ -1509,8 +1509,8 @@ class CGObjCGNUstep2 : public CGObjCGNUstep { GetSectionBounds(StringRef Section) { if (CGM.getTriple().isOSBinFormatCOFF()) { if (emptyStruct == nullptr) { - emptyStruct = llvm::StructType::create(VMContext, ".objc_section_sentinel"); - emptyStruct->setBody({}, /*isPacked*/true); + emptyStruct = llvm::StructType::create( + VMContext, {}, ".objc_section_sentinel", /*isPacked=*/true); } auto ZeroInit = llvm::Constant::getNullValue(emptyStruct); auto Sym = [&](StringRef Prefix, StringRef SecSuffix) { diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp index 1c16d273a55357..47ea636c756438 100644 --- a/clang/lib/CodeGen/CGObjCMac.cpp +++ b/clang/lib/CodeGen/CGObjCMac.cpp @@ -5835,15 +5835,7 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_protocol_extension * ProtocolExtensionPtrTy = llvm::PointerType::getUnqual(ProtocolExtensionTy); - // Handle recursive construction of Protocol and ProtocolList types - - ProtocolTy = - llvm::StructType::create(VMContext, "struct._objc_protocol"); - - ProtocolListTy = - llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolListTy->setBody(llvm::PointerType::getUnqual(ProtocolListTy), LongTy, - llvm::ArrayType::get(ProtocolTy, 0)); + // Handle construction of Protocol and ProtocolList types // struct _objc_protocol { // struct _objc_protocol_extension *isa; @@ -5852,9 +5844,16 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // struct _objc_method_description_list *instance_methods; // struct _objc_method_description_list *class_methods; // } - ProtocolTy->setBody(ProtocolExtensionPtrTy, Int8PtrTy, - llvm::PointerType::getUnqual(ProtocolListTy), - MethodDescriptionListPtrTy, MethodDescriptionListPtrTy); + ProtocolTy = llvm::StructType::create( + {ProtocolExtensionPtrTy, Int8PtrTy, + llvm::PointerType::getUnqual(VMContext), MethodDescriptionListPtrTy, + MethodDescriptionListPtrTy}, + "struct._objc_protocol"); + + ProtocolListTy = + llvm::StructType::create({llvm::PointerType::getUnqual(VMContext), LongTy, + llvm::ArrayType::get(ProtocolTy, 0)}, + "struct._objc_protocol_list"); // struct _objc_protocol_list * ProtocolListPtrTy = llvm::PointerType::getUnqual(ProtocolListTy); @@ -5886,8 +5885,6 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) "struct._objc_class_extension", IntTy, Int8PtrTy, PropertyListPtrTy); ClassExtensionPtrTy = llvm::PointerType::getUnqual(ClassExtensionTy); - ClassTy = llvm::StructType::create(VMContext, "struct._objc_class"); - // struct _objc_class { // Class isa; // Class super_class; @@ -5902,10 +5899,12 @@ ObjCTypesHelper::ObjCTypesHelper(CodeGen::CodeGenModule &cgm) // char *ivar_layout; // struct _objc_class_ext *ext; // }; - ClassTy->setBody(llvm::PointerType::getUnqual(ClassTy), - llvm::PointerType::getUnqual(ClassTy), Int8PtrTy, LongTy, - LongTy, LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy, - ProtocolListPtrTy, Int8PtrTy, ClassExtensionPtrTy); + ClassTy = llvm::StructType::create( + {llvm::PointerType::getUnqual(VMContext), + llvm::PointerType::getUnqual(VMContext), Int8PtrTy, LongTy, LongTy, + LongTy, IvarListPtrTy, MethodListPtrTy, CachePtrTy, ProtocolListPtrTy, + Int8PtrTy, ClassExtensionPtrTy}, + "struct._objc_class"); ClassPtrTy = llvm::PointerType::getUnqual(ClassTy); @@ -5988,13 +5987,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // const struct _prop_list_t * class_properties; // } - // Holder for struct _protocol_list_t * - ProtocolListnfABITy = - llvm::StructType::create(VMContext, "struct._objc_protocol_list"); - ProtocolnfABITy = llvm::StructType::create( "struct._protocol_t", ObjectPtrTy, Int8PtrTy, - llvm::PointerType::getUnqual(ProtocolListnfABITy), MethodListnfABIPtrTy, + llvm::PointerType::getUnqual(VMContext), MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, MethodListnfABIPtrTy, PropertyListPtrTy, IntTy, IntTy, Int8PtrPtrTy, Int8PtrTy, PropertyListPtrTy); @@ -6006,8 +6001,9 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // long protocol_count; // Note, this is 32/64 bit // struct _protocol_t *[protocol_count]; // } - ProtocolListnfABITy->setBody(LongTy, - llvm::ArrayType::get(ProtocolnfABIPtrTy, 0)); + ProtocolListnfABITy = llvm::StructType::create( + {LongTy, llvm::ArrayType::get(ProtocolnfABIPtrTy, 0)}, + "struct._objc_protocol_list"); // struct _objc_protocol_list* ProtocolListnfABIPtrTy = llvm::PointerType::getUnqual(ProtocolListnfABITy); @@ -6067,11 +6063,12 @@ ObjCNonFragileABITypesHelper::ObjCNonFragileABITypesHelper(CodeGen::CodeGenModul // struct class_ro_t *ro; // } - ClassnfABITy = llvm::StructType::create(VMContext, "struct._class_t"); - ClassnfABITy->setBody(llvm::PointerType::getUnqual(ClassnfABITy), - llvm::PointerType::getUnqual(ClassnfABITy), CachePtrTy, - llvm::PointerType::getUnqual(ImpnfABITy), - llvm::PointerType::getUnqual(ClassRonfABITy)); + ClassnfABITy = llvm::StructType::create( + {llvm::PointerType::getUnqual(VMContext), + llvm::PointerType::getUnqual(VMContext), CachePtrTy, + llvm::PointerType::getUnqual(ImpnfABITy), + llvm::PointerType::getUnqual(ClassRonfABITy)}, + "struct._class_t"); // LLVM for struct _class_t * ClassnfABIPtrTy = llvm::PointerType::getUnqual(ClassnfABITy); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 2bcca5e85bdfeb..ba376f9ecfacde 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -7146,8 +7146,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) { // For C++ standard modules we are done - we will call the module // initializer for imported modules, and that will likewise call those for // any imports it has. - if (CXX20ModuleInits && Import->getImportedOwningModule() && - !Import->getImportedOwningModule()->isModuleMapModule()) + if (CXX20ModuleInits && Import->getImportedModule() && + Import->getImportedModule()->isNamedModule()) break; // For clang C++ module map modules the initializers for sub-modules are diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 0b0b45ffead92f..3802dc8bcafc49 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -529,31 +529,29 @@ class MicrosoftCXXABI : public CGCXXABI { if (ClassHierarchyDescriptorType) return ClassHierarchyDescriptorType; // Forward-declare RTTIClassHierarchyDescriptor to break a cycle. - ClassHierarchyDescriptorType = llvm::StructType::create( - CGM.getLLVMContext(), "rtti.ClassHierarchyDescriptor"); llvm::Type *FieldTypes[] = {CGM.IntTy, CGM.IntTy, CGM.IntTy, getImageRelativeType(CGM.UnqualPtrTy)}; - ClassHierarchyDescriptorType->setBody(FieldTypes); + ClassHierarchyDescriptorType = + llvm::StructType::create(FieldTypes, "rtti.ClassHierarchyDescriptor"); return ClassHierarchyDescriptorType; } llvm::StructType *getCompleteObjectLocatorType() { if (CompleteObjectLocatorType) return CompleteObjectLocatorType; - CompleteObjectLocatorType = llvm::StructType::create( - CGM.getLLVMContext(), "rtti.CompleteObjectLocator"); llvm::Type *FieldTypes[] = { CGM.IntTy, CGM.IntTy, CGM.IntTy, getImageRelativeType(CGM.Int8PtrTy), getImageRelativeType(CGM.UnqualPtrTy), - getImageRelativeType(CompleteObjectLocatorType), + getImageRelativeType(CGM.VoidTy), }; llvm::ArrayRef FieldTypesRef(FieldTypes); if (!isImageRelative()) FieldTypesRef = FieldTypesRef.drop_back(); - CompleteObjectLocatorType->setBody(FieldTypesRef); + CompleteObjectLocatorType = + llvm::StructType::create(FieldTypesRef, "rtti.CompleteObjectLocator"); return CompleteObjectLocatorType; } diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 34de0043ca012a..bdf3da0c96adca 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -109,7 +109,8 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T, llvm::Expected> ToolChain::executeToolChainProgram(StringRef Executable) const { llvm::SmallString<64> OutputFile; - llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile); + llvm::sys::fs::createTemporaryFile("toolchain-program", "txt", OutputFile, + llvm::sys::fs::OF_Text); llvm::FileRemover OutputRemover(OutputFile.c_str()); std::optional Redirects[] = { {""}, @@ -128,7 +129,8 @@ ToolChain::executeToolChainProgram(StringRef Executable) const { *Str + "'"); SecondsToWait = std::max(SecondsToWait, 0); // infinite } - if (llvm::sys::ExecuteAndWait(Executable, {}, {}, Redirects, SecondsToWait, + if (llvm::sys::ExecuteAndWait(Executable, {Executable}, {}, Redirects, + SecondsToWait, /*MemoryLimit=*/0, &ErrorMessage)) return llvm::createStringError(std::error_code(), Executable + ": " + ErrorMessage); diff --git a/clang/lib/Index/USRGeneration.cpp b/clang/lib/Index/USRGeneration.cpp index 35d0aefaf69a68..493123459a5a4d 100644 --- a/clang/lib/Index/USRGeneration.cpp +++ b/clang/lib/Index/USRGeneration.cpp @@ -62,20 +62,17 @@ namespace { class USRGenerator : public ConstDeclVisitor { SmallVectorImpl &Buf; llvm::raw_svector_ostream Out; - bool IgnoreResults; ASTContext *Context; - bool generatedLoc; + const LangOptions &LangOpts; + bool IgnoreResults = false; + bool generatedLoc = false; llvm::DenseMap TypeSubstitutions; public: - explicit USRGenerator(ASTContext *Ctx, SmallVectorImpl &Buf) - : Buf(Buf), - Out(Buf), - IgnoreResults(false), - Context(Ctx), - generatedLoc(false) - { + USRGenerator(ASTContext *Ctx, SmallVectorImpl &Buf, + const LangOptions &LangOpts) + : Buf(Buf), Out(Buf), Context(Ctx), LangOpts(LangOpts) { // Add the USR space prefix. Out << getUSRSpacePrefix(); } @@ -246,14 +243,13 @@ void USRGenerator::VisitFunctionDecl(const FunctionDecl *D) { } else Out << "@F@"; - PrintingPolicy Policy(Context->getLangOpts()); + PrintingPolicy Policy(LangOpts); // Forward references can have different template argument names. Suppress the // template argument names in constructors to make their USR more stable. Policy.SuppressTemplateArgsInCXXConstructors = true; D->getDeclName().print(Out, Policy); - ASTContext &Ctx = *Context; - if ((!Ctx.getLangOpts().CPlusPlus || D->isExternC()) && + if ((!LangOpts.CPlusPlus || D->isExternC()) && !D->hasAttr()) return; @@ -657,9 +653,10 @@ bool USRGenerator::GenLoc(const Decl *D, bool IncludeOffset) { return IgnoreResults; } -static void printQualifier(llvm::raw_ostream &Out, ASTContext &Ctx, NestedNameSpecifier *NNS) { +static void printQualifier(llvm::raw_ostream &Out, const LangOptions &LangOpts, + NestedNameSpecifier *NNS) { // FIXME: Encode the qualifier, don't just print it. - PrintingPolicy PO(Ctx.getLangOpts()); + PrintingPolicy PO(LangOpts); PO.SuppressTagKeyword = true; PO.SuppressUnwrittenScope = true; PO.ConstantArraySizeAsWritten = false; @@ -948,7 +945,7 @@ void USRGenerator::VisitType(QualType T) { } if (const DependentNameType *DNT = T->getAs()) { Out << '^'; - printQualifier(Out, Ctx, DNT->getQualifier()); + printQualifier(Out, LangOpts, DNT->getQualifier()); Out << ':' << DNT->getIdentifier()->getName(); return; } @@ -1090,7 +1087,7 @@ void USRGenerator::VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl return; VisitDeclContext(D->getDeclContext()); Out << "@UUV@"; - printQualifier(Out, D->getASTContext(), D->getQualifier()); + printQualifier(Out, LangOpts, D->getQualifier()); EmitDeclName(D); } @@ -1099,7 +1096,7 @@ void USRGenerator::VisitUnresolvedUsingTypenameDecl(const UnresolvedUsingTypenam return; VisitDeclContext(D->getDeclContext()); Out << "@UUT@"; - printQualifier(Out, D->getASTContext(), D->getQualifier()); + printQualifier(Out, LangOpts, D->getQualifier()); Out << D->getName(); // Simple name. } @@ -1190,6 +1187,13 @@ bool clang::index::generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf) { if (!D) return true; + return generateUSRForDecl(D, Buf, D->getASTContext().getLangOpts()); +} + +bool clang::index::generateUSRForDecl(const Decl *D, SmallVectorImpl &Buf, + const LangOptions &LangOpts) { + if (!D) + return true; // We don't ignore decls with invalid source locations. Implicit decls, like // C++'s operator new function, can have invalid locations but it is fine to // create USRs that can identify them. @@ -1203,7 +1207,7 @@ bool clang::index::generateUSRForDecl(const Decl *D, return false; } } - USRGenerator UG(&D->getASTContext(), Buf); + USRGenerator UG(&D->getASTContext(), Buf, LangOpts); UG.Visit(D); return UG.ignoreResults(); } @@ -1240,11 +1244,17 @@ bool clang::index::generateUSRForMacro(StringRef MacroName, SourceLocation Loc, bool clang::index::generateUSRForType(QualType T, ASTContext &Ctx, SmallVectorImpl &Buf) { + return generateUSRForType(T, Ctx, Buf, Ctx.getLangOpts()); +} + +bool clang::index::generateUSRForType(QualType T, ASTContext &Ctx, + SmallVectorImpl &Buf, + const LangOptions &LangOpts) { if (T.isNull()) return true; T = T.getCanonicalType(); - USRGenerator UG(&Ctx, Buf); + USRGenerator UG(&Ctx, Buf, LangOpts); UG.VisitType(T); return UG.ignoreResults(); } diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index 7d727efb228731..6470e55e521add 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -1243,6 +1243,7 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { ParsedStmtContext::Compound | (isStmtExpr ? ParsedStmtContext::InStmtExpr : ParsedStmtContext()); + bool LastIsError = false; while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) && Tok.isNot(tok::eof)) { if (Tok.is(tok::annot_pragma_unused)) { @@ -1299,7 +1300,15 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) { if (R.isUsable()) Stmts.push_back(R.get()); + LastIsError = R.isInvalid(); } + // StmtExpr needs to do copy initialization for last statement. + // If last statement is invalid, the last statement in `Stmts` will be + // incorrect. Then the whole compound statement should also be marked as + // invalid to prevent subsequent errors. + if (isStmtExpr && LastIsError && !Stmts.empty()) + return StmtError(); + // Warn the user that using option `-ffp-eval-method=source` on a // 32-bit target and feature `sse` disabled, or using // `pragma clang fp eval_method=source` and feature `sse` disabled, is not diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 3308b898a5b68f..dae271c1ff5001 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -8900,7 +8900,12 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << Call->getCallee()->getSourceRange()); else if (const auto *RT = PointeeTy->getAs()) { - bool IsTriviallyCopyableCXXRecord = + // FIXME: Do not consider incomplete types even though they may be + // completed later. GCC does not diagnose such code, but we may want to + // consider diagnosing it in the future, perhaps under a different, but + // related, diagnostic group. + bool MayBeTriviallyCopyableCXXRecord = + RT->isIncompleteType() || RT->desugar().isTriviallyCopyableType(Context); if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && @@ -8910,7 +8915,7 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << ArgIdx << FnName << PointeeTy << 0); SearchNonTrivialToInitializeField::diag(PointeeTy, Dest, *this); } else if ((BId == Builtin::BImemset || BId == Builtin::BIbzero) && - !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + !MayBeTriviallyCopyableCXXRecord && ArgIdx == 0) { // FIXME: Limiting this warning to dest argument until we decide // whether it's valid for source argument too. DiagRuntimeBehavior(Dest->getExprLoc(), Dest, @@ -8923,7 +8928,7 @@ void Sema::CheckMemaccessArguments(const CallExpr *Call, << ArgIdx << FnName << PointeeTy << 1); SearchNonTrivialToCopyField::diag(PointeeTy, Dest, *this); } else if ((BId == Builtin::BImemcpy || BId == Builtin::BImemmove) && - !IsTriviallyCopyableCXXRecord && ArgIdx == 0) { + !MayBeTriviallyCopyableCXXRecord && ArgIdx == 0) { // FIXME: Limiting this warning to dest argument until we decide // whether it's valid for source argument too. DiagRuntimeBehavior(Dest->getExprLoc(), Dest, diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index fcf05798d9c709..4503e60cff8c2f 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -4381,8 +4381,20 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc, SmallVector PartialSpecs; Template->getPartialSpecializations(PartialSpecs); - for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) { - VarTemplatePartialSpecializationDecl *Partial = PartialSpecs[I]; + for (VarTemplatePartialSpecializationDecl *Partial : PartialSpecs) { + // C++ [temp.spec.partial.member]p2: + // If the primary member template is explicitly specialized for a given + // (implicit) specialization of the enclosing class template, the partial + // specializations of the member template are ignored for this + // specialization of the enclosing class template. If a partial + // specialization of the member template is explicitly specialized for a + // given (implicit) specialization of the enclosing class template, the + // primary member template and its other partial specializations are still + // considered for this specialization of the enclosing class template. + if (Template->getMostRecentDecl()->isMemberSpecialization() && + !Partial->getMostRecentDecl()->isMemberSpecialization()) + continue; + TemplateDeductionInfo Info(FailedCandidates.getLocation()); if (TemplateDeductionResult Result = diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index dea97bfce532c9..b63063813f1b56 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -3978,11 +3978,24 @@ bool Sema::usesPartialOrExplicitSpecialization( return true; SmallVector PartialSpecs; - ClassTemplateSpec->getSpecializedTemplate() - ->getPartialSpecializations(PartialSpecs); - for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) { + ClassTemplateDecl *CTD = ClassTemplateSpec->getSpecializedTemplate(); + CTD->getPartialSpecializations(PartialSpecs); + for (ClassTemplatePartialSpecializationDecl *CTPSD : PartialSpecs) { + // C++ [temp.spec.partial.member]p2: + // If the primary member template is explicitly specialized for a given + // (implicit) specialization of the enclosing class template, the partial + // specializations of the member template are ignored for this + // specialization of the enclosing class template. If a partial + // specialization of the member template is explicitly specialized for a + // given (implicit) specialization of the enclosing class template, the + // primary member template and its other partial specializations are still + // considered for this specialization of the enclosing class template. + if (CTD->getMostRecentDecl()->isMemberSpecialization() && + !CTPSD->getMostRecentDecl()->isMemberSpecialization()) + continue; + TemplateDeductionInfo Info(Loc); - if (DeduceTemplateArguments(PartialSpecs[I], + if (DeduceTemplateArguments(CTPSD, ClassTemplateSpec->getTemplateArgs().asArray(), Info) == TemplateDeductionResult::Success) return true; @@ -4025,8 +4038,21 @@ getPatternForClassTemplateSpecialization( SmallVector PartialSpecs; Template->getPartialSpecializations(PartialSpecs); TemplateSpecCandidateSet FailedCandidates(PointOfInstantiation); - for (unsigned I = 0, N = PartialSpecs.size(); I != N; ++I) { - ClassTemplatePartialSpecializationDecl *Partial = PartialSpecs[I]; + for (ClassTemplatePartialSpecializationDecl *Partial : PartialSpecs) { + // C++ [temp.spec.partial.member]p2: + // If the primary member template is explicitly specialized for a given + // (implicit) specialization of the enclosing class template, the + // partial specializations of the member template are ignored for this + // specialization of the enclosing class template. If a partial + // specialization of the member template is explicitly specialized for a + // given (implicit) specialization of the enclosing class template, the + // primary member template and its other partial specializations are + // still considered for this specialization of the enclosing class + // template. + if (Template->getMostRecentDecl()->isMemberSpecialization() && + !Partial->getMostRecentDecl()->isMemberSpecialization()) + continue; + TemplateDeductionInfo Info(FailedCandidates.getLocation()); if (TemplateDeductionResult Result = S.DeduceTemplateArguments( Partial, ClassTemplateSpec->getTemplateArgs().asArray(), Info); diff --git a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp index 5cdf047738abcb..76a4599cc8d788 100644 --- a/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/WebKit/UncountedLocalVarsChecker.cpp @@ -48,6 +48,65 @@ bool isRefcountedStringsHack(const VarDecl *V) { return false; } +struct GuardianVisitor : public RecursiveASTVisitor { + using Base = RecursiveASTVisitor; + + const VarDecl *Guardian{nullptr}; + +public: + explicit GuardianVisitor(const VarDecl *Guardian) : Guardian(Guardian) { + assert(Guardian); + } + + bool VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->isAssignmentOp()) { + if (auto *VarRef = dyn_cast(BO->getLHS())) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + return true; + } + + bool VisitCXXConstructExpr(const CXXConstructExpr *CE) { + if (auto *Ctor = CE->getConstructor()) { + if (Ctor->isMoveConstructor() && CE->getNumArgs() == 1) { + auto *Arg = CE->getArg(0)->IgnoreParenCasts(); + if (auto *VarRef = dyn_cast(Arg)) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + } + return true; + } + + bool VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { + auto MethodName = safeGetName(MCE->getMethodDecl()); + if (MethodName == "swap" || MethodName == "leakRef" || + MethodName == "releaseNonNull") { + auto *ThisArg = MCE->getImplicitObjectArgument()->IgnoreParenCasts(); + if (auto *VarRef = dyn_cast(ThisArg)) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + return true; + } + + bool VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *OCE) { + if (OCE->isAssignmentOp()) { + assert(OCE->getNumArgs() == 2); + auto *ThisArg = OCE->getArg(0)->IgnoreParenCasts(); + if (auto *VarRef = dyn_cast(ThisArg)) { + if (VarRef->getDecl() == Guardian) + return false; + } + } + return true; + } +}; + bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, const VarDecl *MaybeGuardian) { assert(Guarded); @@ -81,7 +140,7 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, // We need to skip the first CompoundStmt to avoid situation when guardian is // defined in the same scope as guarded variable. - bool HaveSkippedFirstCompoundStmt = false; + const CompoundStmt *FirstCompondStmt = nullptr; for (DynTypedNodeList guardedVarAncestors = ctx.getParents(*Guarded); !guardedVarAncestors.empty(); guardedVarAncestors = ctx.getParents( @@ -90,12 +149,15 @@ bool isGuardedScopeEmbeddedInGuardianScope(const VarDecl *Guarded, ) { for (auto &guardedVarAncestor : guardedVarAncestors) { if (auto *CStmtAncestor = guardedVarAncestor.get()) { - if (!HaveSkippedFirstCompoundStmt) { - HaveSkippedFirstCompoundStmt = true; + if (!FirstCompondStmt) { + FirstCompondStmt = CStmtAncestor; continue; } - if (CStmtAncestor == guardiansClosestCompStmtAncestor) - return true; + if (CStmtAncestor == guardiansClosestCompStmtAncestor) { + GuardianVisitor guardianVisitor(MaybeGuardian); + auto *GuardedScope = const_cast(FirstCompondStmt); + return guardianVisitor.TraverseCompoundStmt(GuardedScope); + } } } } diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StdSpecialSymbolMap.inc b/clang/lib/Tooling/Inclusions/Stdlib/StdSpecialSymbolMap.inc index 0d351d688a3296..4d466013eeac3f 100644 --- a/clang/lib/Tooling/Inclusions/Stdlib/StdSpecialSymbolMap.inc +++ b/clang/lib/Tooling/Inclusions/Stdlib/StdSpecialSymbolMap.inc @@ -367,6 +367,11 @@ SYMBOL(any_cast, std::, ) SYMBOL(div, std::, ) SYMBOL(abort, std::, ) +SYMBOL(binary_search, std::ranges::, ) +SYMBOL(equal_range, std::ranges::, ) +SYMBOL(lower_bound, std::ranges::, ) +SYMBOL(upper_bound, std::ranges::, ) + // These are C symbols that are not under std namespace. SYMBOL(localtime_r, None, ) SYMBOL(localtime_r, None, ) diff --git a/clang/test/Analysis/Checkers/WebKit/mock-types.h b/clang/test/Analysis/Checkers/WebKit/mock-types.h index 8d8a90f0afae0e..82c79c97a83de6 100644 --- a/clang/test/Analysis/Checkers/WebKit/mock-types.h +++ b/clang/test/Analysis/Checkers/WebKit/mock-types.h @@ -49,7 +49,23 @@ template , typename RefDerefTra Ref() : t{} {}; Ref(T &t) : t(&RefDerefTraits::ref(t)) { } Ref(const Ref& o) : t(RefDerefTraits::refIfNotNull(PtrTraits::unwrap(o.t))) { } + Ref(Ref&& o) : t(o.leakRef()) { } ~Ref() { RefDerefTraits::derefIfNotNull(PtrTraits::exchange(t, nullptr)); } + Ref& operator=(T &t) { + Ref o(t); + swap(o); + return *this; + } + Ref& operator=(Ref &&o) { + Ref m(o); + swap(m); + return *this; + } + void swap(Ref& o) { + typename PtrTraits::StorageType tmp = t; + t = o.t; + o.t = tmp; + } T &get() { return *PtrTraits::unwrap(t); } T *ptr() { return PtrTraits::unwrap(t); } T *operator->() { return PtrTraits::unwrap(t); } @@ -74,11 +90,27 @@ template struct RefPtr { if (t) t->deref(); } + Ref releaseNonNull() { + Ref tmp(*t); + if (t) + t->deref(); + t = nullptr; + return tmp; + } + void swap(RefPtr& o) { + T* tmp = t; + t = o.t; + o.t = tmp; + } T *get() { return t; } T *operator->() { return t; } const T *operator->() const { return t; } T &operator*() { return *t; } - RefPtr &operator=(T *) { return *this; } + RefPtr &operator=(T *t) { + RefPtr o(t); + swap(o); + return *this; + } operator bool() const { return t; } }; diff --git a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp index 1c0df42cdda663..d7fb689557a6fc 100644 --- a/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp +++ b/clang/test/Analysis/Checkers/WebKit/uncounted-local-vars.cpp @@ -83,6 +83,83 @@ void foo7(RefCountable* obj) { bar.obj->method(); } +void foo8(RefCountable* obj) { + RefPtr foo; + { + RefCountable *bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo = nullptr; + bar->method(); + } + RefPtr baz; + { + RefCountable *bar = baz.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + baz = obj; + bar->method(); + } + foo = nullptr; + { + RefCountable *bar = foo.get(); + // No warning. It's okay to mutate RefPtr in an outer scope. + bar->method(); + } + foo = obj; + { + RefCountable *bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo.releaseNonNull(); + bar->method(); + } + { + RefCountable *bar = foo.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo = obj ? obj : nullptr; + bar->method(); + } + { + RefCountable *bar = foo->trivial() ? foo.get() : nullptr; + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + foo = nullptr; + bar->method(); + } +} + +void foo9(RefCountable& o) { + Ref guardian(o); + { + RefCountable &bar = guardian.get(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + guardian = o; // We don't detect that we're setting it to the same value. + bar.method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + Ref other(*bar); // We don't detect other has the same value as guardian. + guardian.swap(other); + bar->method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + Ref other(static_cast&&>(guardian)); + bar->method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + guardian.leakRef(); + bar->method(); + } + { + RefCountable *bar = guardian.ptr(); + // expected-warning@-1{{Local variable 'bar' is uncounted and unsafe [alpha.webkit.UncountedLocalVarsChecker]}} + guardian = o.trivial() ? o : *bar; + bar->method(); + } +} + } // namespace guardian_scopes namespace auto_keyword { diff --git a/clang/test/CXX/temp/temp.decls/temp.spec.partial/temp.spec.partial.member/p2.cpp b/clang/test/CXX/temp/temp.decls/temp.spec.partial/temp.spec.partial.member/p2.cpp new file mode 100644 index 00000000000000..7969b7efe597f9 --- /dev/null +++ b/clang/test/CXX/temp/temp.decls/temp.spec.partial/temp.spec.partial.member/p2.cpp @@ -0,0 +1,85 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s +// expected-no-diagnostics + +template +struct A { + template + struct B { + static constexpr int y = 0; + }; + + template + struct B { + static constexpr int y = 1; + }; + + template + static constexpr int x = 0; + + template + static constexpr int x = 1; +}; + +template +template +struct A::B { + static constexpr int y = 2; +}; + +template +template +constexpr int A::x = 2; + +static_assert(A::B::y == 0); +static_assert(A::B::y == 1); +static_assert(A::B::y == 2); +static_assert(A::x == 0); +static_assert(A::x == 1); +static_assert(A::x == 2); + +template<> +template +struct A::B { + static constexpr int y = 3; +}; + +template<> +template +struct A::B { + static constexpr int y = 4; +}; + +template<> +template +struct A::B { + static constexpr int y = 5; +}; + +template<> +template +constexpr int A::x = 3; + +template<> +template +constexpr int A::x = 4; + +template<> +template +constexpr int A::x = 5; + +static_assert(A::B::y == 3); +static_assert(A::B::y == 3); +static_assert(A::B::y == 3); +static_assert(A::B::y == 4); +static_assert(A::x == 3); +static_assert(A::x == 3); +static_assert(A::x == 3); +static_assert(A::x == 4); +static_assert(A::B::y == 0); +static_assert(A::B::y == 1); +static_assert(A::B::y == 2); +static_assert(A::B::y == 5); +static_assert(A::x == 0); +static_assert(A::x == 1); +static_assert(A::x == 2); +static_assert(A::x == 5); diff --git a/clang/test/CodeGen/aarch64-cpu-supports-target.c b/clang/test/CodeGen/aarch64-cpu-supports-target.c index 5186cab92a921d..e3a75e9a1fc7d3 100644 --- a/clang/test/CodeGen/aarch64-cpu-supports-target.c +++ b/clang/test/CodeGen/aarch64-cpu-supports-target.c @@ -9,9 +9,9 @@ int check_all_feature() { return 3; else if (__builtin_cpu_supports("fcma+rcpc+rcpc2+rcpc3+frintts+dgh")) return 4; - else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve+sve-bf16")) + else if (__builtin_cpu_supports("i8mm+bf16+ebf16+rpres+sve")) return 5; - else if (__builtin_cpu_supports("sve-ebf16+sve-i8mm+f32mm+f64mm")) + else if (__builtin_cpu_supports("sve+ebf16+i8mm+f32mm+f64mm")) return 6; else if (__builtin_cpu_supports("sve2+sve2-aes+sve2-pmull128")) return 7; diff --git a/clang/test/CodeGen/aarch64-fmv-dependencies.c b/clang/test/CodeGen/aarch64-fmv-dependencies.c index 6d230007f91ff9..db6be423b99f78 100644 --- a/clang/test/CodeGen/aarch64-fmv-dependencies.c +++ b/clang/test/CodeGen/aarch64-fmv-dependencies.c @@ -135,15 +135,6 @@ __attribute__((target_version("ssbs"))) int fmv(void) { return 0; } // CHECK: define dso_local i32 @fmv._Msve() #[[sve:[0-9]+]] { __attribute__((target_version("sve"))) int fmv(void) { return 0; } -// CHECK: define dso_local i32 @fmv._Msve-bf16() #[[sve_bf16_ebf16:[0-9]+]] { -__attribute__((target_version("sve-bf16"))) int fmv(void) { return 0; } - -// CHECK: define dso_local i32 @fmv._Msve-ebf16() #[[sve_bf16_ebf16:[0-9]+]] { -__attribute__((target_version("sve-ebf16"))) int fmv(void) { return 0; } - -// CHECK: define dso_local i32 @fmv._Msve-i8mm() #[[sve_i8mm:[0-9]+]] { -__attribute__((target_version("sve-i8mm"))) int fmv(void) { return 0; } - // CHECK: define dso_local i32 @fmv._Msve2() #[[sve2:[0-9]+]] { __attribute__((target_version("sve2"))) int fmv(void) { return 0; } @@ -209,8 +200,6 @@ int caller() { // CHECK: attributes #[[sme2]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+neon,+outline-atomics,+sme,+sme2,+v8a" // CHECK: attributes #[[ssbs]] = { {{.*}} "target-features"="+fp-armv8,+neon,+outline-atomics,+ssbs,+v8a" // CHECK: attributes #[[sve]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a" -// CHECK: attributes #[[sve_bf16_ebf16]] = { {{.*}} "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+v8a" -// CHECK: attributes #[[sve_i8mm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+i8mm,+neon,+outline-atomics,+sve,+v8a" // CHECK: attributes #[[sve2]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+v8a" // CHECK: attributes #[[sve2_aes]] = { {{.*}} "target-features"="+aes,+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-aes,+v8a" // CHECK: attributes #[[sve2_bitperm]] = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+outline-atomics,+sve,+sve2,+sve2-bitperm,+v8a" diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index dc0cc429abffd1..cd09e05b25e4cd 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -27,11 +27,11 @@ int foo() { inline int __attribute__((target_version("sha2+aes+f64mm"))) fmv_inline(void) { return 1; } inline int __attribute__((target_version("fp16+fcma+rdma+sme+ fp16 "))) fmv_inline(void) { return 2; } inline int __attribute__((target_version("sha3+i8mm+f32mm"))) fmv_inline(void) { return 12; } -inline int __attribute__((target_version("dit+sve-ebf16"))) fmv_inline(void) { return 8; } +inline int __attribute__((target_version("dit+ebf16"))) fmv_inline(void) { return 8; } inline int __attribute__((target_version("dpb+rcpc2 "))) fmv_inline(void) { return 6; } inline int __attribute__((target_version(" dpb2 + jscvt"))) fmv_inline(void) { return 7; } inline int __attribute__((target_version("rcpc+frintts"))) fmv_inline(void) { return 3; } -inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { return 4; } +inline int __attribute__((target_version("sve+bf16"))) fmv_inline(void) { return 4; } inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; } inline int __attribute__((target_version("sve2+sve2-aes+sve2-bitperm"))) fmv_inline(void) { return 9; } inline int __attribute__((target_version("sve2-sm4+memtag"))) fmv_inline(void) { return 10; } @@ -680,7 +680,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMsve-ebf16 +// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MditMebf16 // CHECK-SAME: () #[[ATTR28:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 8 @@ -708,7 +708,7 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // // // CHECK: Function Attrs: noinline nounwind optnone -// CHECK-LABEL: define {{[^@]+}}@fmv_inline._MsveMsve-bf16 +// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mbf16Msve // CHECK-SAME: () #[[ATTR32:[0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: ret i32 4 @@ -837,20 +837,20 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NEXT: ret ptr @fmv_inline._Msve2-aesMsve2-sha3 // CHECK: resolver_else12: // CHECK-NEXT: [[TMP28:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 4295098368 -// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 4295098368 +// CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 1207959552 +// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[TMP29]], 1207959552 // CHECK-NEXT: [[TMP31:%.*]] = and i1 true, [[TMP30]] // CHECK-NEXT: br i1 [[TMP31]], label [[RESOLVER_RETURN13:%.*]], label [[RESOLVER_ELSE14:%.*]] // CHECK: resolver_return13: -// CHECK-NEXT: ret ptr @fmv_inline._MditMsve-ebf16 +// CHECK-NEXT: ret ptr @fmv_inline._Mbf16Msve // CHECK: resolver_else14: // CHECK-NEXT: [[TMP32:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 -// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 3221225472 -// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 3221225472 +// CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 268566528 +// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i64 [[TMP33]], 268566528 // CHECK-NEXT: [[TMP35:%.*]] = and i1 true, [[TMP34]] // CHECK-NEXT: br i1 [[TMP35]], label [[RESOLVER_RETURN15:%.*]], label [[RESOLVER_ELSE16:%.*]] // CHECK: resolver_return15: -// CHECK-NEXT: ret ptr @fmv_inline._MsveMsve-bf16 +// CHECK-NEXT: ret ptr @fmv_inline._MditMebf16 // CHECK: resolver_else16: // CHECK-NEXT: [[TMP36:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8 // CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 20971520 diff --git a/clang/test/CodeGen/pgo-cold-function-coverage.c b/clang/test/CodeGen/pgo-cold-function-coverage.c index fd1e1e7e14cda5..3003cdc3e15e02 100644 --- a/clang/test/CodeGen/pgo-cold-function-coverage.c +++ b/clang/test/CodeGen/pgo-cold-function-coverage.c @@ -1,7 +1,7 @@ // Test -fprofile-generate-cold-function-coverage // RUN: rm -rf %t && split-file %s %t -// RUN: %clang -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s +// RUN: %clang --target=x86_64 -O2 -fprofile-generate-cold-function-coverage=/xxx/yyy/ -fprofile-sample-accurate -fprofile-sample-use=%t/pgo-cold-func.prof -S -emit-llvm -o - %t/pgo-cold-func.c | FileCheck %s // CHECK: @__llvm_profile_filename = {{.*}} c"/xxx/yyy/default_%m.profraw\00" diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-annotations.hlsl deleted file mode 100644 index e1e047485e4df0..00000000000000 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-annotations.hlsl +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s - -RWBuffer Buffer1; -RWBuffer > BufferArray[4]; - -RWBuffer Buffer2 : register(u3); -RWBuffer > BufferArray2[4] : register(u4); - -RWBuffer Buffer3 : register(u3, space1); -RWBuffer > BufferArray3[4] : register(u4, space1); - - - -[numthreads(1,1,1)] -void main() { -} - -// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]} -// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 false, i32 3, i32 0} -// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 false, i32 4, i32 0} -// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 false, i32 3, i32 1} -// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 false, i32 4, i32 1} diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl index fa81b53fd9bddc..16120a44a9e4d2 100644 --- a/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-elementtype.hlsl @@ -54,17 +54,3 @@ void main(int GI : SV_GroupIndex) { BufF16x2[GI] = 0; BufF32x3[GI] = 0; } - -// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3, -// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6, -// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9, -// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10, -// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9, diff --git a/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl index 727f416cde57fd..71b5b7a75fa431 100644 --- a/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl +++ b/clang/test/CodeGenHLSL/builtins/RWStructuredBuffer-elementtype.hlsl @@ -54,17 +54,3 @@ void main(int GI : SV_GroupIndex) { BufF16x2[GI] = 0; BufF32x3[GI] = 0; } - -// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3, -// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6, -// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9, -// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10, -// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9, diff --git a/clang/test/CodeGenHLSL/builtins/RasterizerOrderedBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/RasterizerOrderedBuffer-annotations.hlsl deleted file mode 100644 index 5155f129025979..00000000000000 --- a/clang/test/CodeGenHLSL/builtins/RasterizerOrderedBuffer-annotations.hlsl +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-pixel -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s - -RasterizerOrderedBuffer Buffer1; -RasterizerOrderedBuffer > BufferArray[4]; - -RasterizerOrderedBuffer Buffer2 : register(u3); -RasterizerOrderedBuffer > BufferArray2[4] : register(u4); - -RasterizerOrderedBuffer Buffer3 : register(u3, space1); -RasterizerOrderedBuffer > BufferArray3[4] : register(u4, space1); - -void main() {} - -// CHECK: !hlsl.uavs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]} -// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 true, i32 -1, i32 0} -// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 true, i32 -1, i32 0} -// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 true, i32 3, i32 0} -// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 true, i32 4, i32 0} -// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 true, i32 3, i32 1} -// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 true, i32 4, i32 1} diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl deleted file mode 100644 index a88ea774f33201..00000000000000 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-annotations.hlsl +++ /dev/null @@ -1,22 +0,0 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s - -StructuredBuffer Buffer1; -StructuredBuffer > BufferArray[4]; - -StructuredBuffer Buffer2 : register(t3); -StructuredBuffer > BufferArray2[4] : register(t4); - -StructuredBuffer Buffer3 : register(t3, space1); -StructuredBuffer > BufferArray3[4] : register(t4, space1); - -[numthreads(1,1,1)] -void main() { -} - -// CHECK: !hlsl.srvs = !{![[Single:[0-9]+]], ![[Array:[0-9]+]], ![[SingleAllocated:[0-9]+]], ![[ArrayAllocated:[0-9]+]], ![[SingleSpace:[0-9]+]], ![[ArraySpace:[0-9]+]]} -// CHECK-DAG: ![[Single]] = !{ptr @Buffer1, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[Array]] = !{ptr @BufferArray, i32 10, i32 9, i1 false, i32 -1, i32 0} -// CHECK-DAG: ![[SingleAllocated]] = !{ptr @Buffer2, i32 10, i32 9, i1 false, i32 3, i32 0} -// CHECK-DAG: ![[ArrayAllocated]] = !{ptr @BufferArray2, i32 10, i32 9, i1 false, i32 4, i32 0} -// CHECK-DAG: ![[SingleSpace]] = !{ptr @Buffer3, i32 10, i32 9, i1 false, i32 3, i32 1} -// CHECK-DAG: ![[ArraySpace]] = !{ptr @BufferArray3, i32 10, i32 9, i1 false, i32 4, i32 1} diff --git a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl index 4c30119498ff1a..205e13b4de3946 100644 --- a/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl +++ b/clang/test/CodeGenHLSL/builtins/StructuredBuffer-elementtype.hlsl @@ -54,17 +54,3 @@ void main(int GI : SV_GroupIndex) { half2 v12 = BufF16x2[GI]; float3 v13 = BufF32x3[GI]; } - -// CHECK: !{{[0-9]+}} = !{ptr @BufI16, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU16, i32 10, i32 3, -// CHECK: !{{[0-9]+}} = !{ptr @BufI32, i32 10, i32 4, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufI64, i32 10, i32 6, -// CHECK: !{{[0-9]+}} = !{ptr @BufU64, i32 10, i32 7, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32, i32 10, i32 9, -// CHECK: !{{[0-9]+}} = !{ptr @BufF64, i32 10, i32 10, -// CHECK: !{{[0-9]+}} = !{ptr @BufI16x4, i32 10, i32 2, -// CHECK: !{{[0-9]+}} = !{ptr @BufU32x3, i32 10, i32 5, -// CHECK: !{{[0-9]+}} = !{ptr @BufF16x2, i32 10, i32 8, -// CHECK: !{{[0-9]+}} = !{ptr @BufF32x3, i32 10, i32 9, diff --git a/clang/test/CodeGenHLSL/cbuf.hlsl b/clang/test/CodeGenHLSL/cbuf.hlsl index 78d9768b22fc87..3f9d4514967dd2 100644 --- a/clang/test/CodeGenHLSL/cbuf.hlsl +++ b/clang/test/CodeGenHLSL/cbuf.hlsl @@ -23,6 +23,4 @@ float foo() { } // CHECK: !hlsl.cbufs = !{![[CBMD:[0-9]+]]} -// CHECK: !hlsl.srvs = !{![[TBMD:[0-9]+]]} // CHECK: ![[CBMD]] = !{ptr @[[CB]], i32 13, i32 0, i1 false, i32 0, i32 2} -// CHECK: ![[TBMD]] = !{ptr @[[TB]], i32 15, i32 0, i1 false, i32 2, i32 1} diff --git a/clang/test/Driver/nvlink-wrapper.c b/clang/test/Driver/nvlink-wrapper.c index 2b0993caee4248..79f4a6641732f7 100644 --- a/clang/test/Driver/nvlink-wrapper.c +++ b/clang/test/Driver/nvlink-wrapper.c @@ -21,12 +21,13 @@ int bar() { } #else extern int y; -int __attribute__((visibility("hidden"))) x = 999; +extern int x; int baz() { return y + x; } #endif // Create various inputs to test basic linking and LTO capabilities. Creating a // CUDA binary requires access to the `ptxas` executable, so we just use x64. +// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.o // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DX -o %t-x.o // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DY -o %t-y.o // RUN: %clang -cc1 %s -triple x86_64-unknown-linux-gnu -emit-obj -DZ -o %t-z.o @@ -36,6 +37,7 @@ int baz() { return y + x; } // RUN: llvm-ar rcs %t-y.a %t-y.o // RUN: llvm-ar rcs %t-z.a %t-z.o // RUN: llvm-ar rcs %t-w.a %t-w.o +// RUN: llvm-ar rcs %t-u.a %t-u.o // // Check that we forward any unrecognized argument to 'nvlink'. @@ -49,11 +51,16 @@ int baz() { return y + x; } // `libx.a` and `liby.a` because extern weak symbols do not extract and `libz.a` // is not used at all. // -// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.o %t-y.a %t-z.a %t-w.a \ +// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.a %t-y.a %t-z.a %t-w.a %t.o \ // RUN: -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LINK // LINK: nvlink{{.*}} -arch sm_52 -o a.out [[INPUT:.+]].cubin {{.*}}-x-{{.*}}.cubin{{.*}}-y-{{.*}}.cubin -// RUN: %clang -cc1 %s -triple nvptx64-nvidia-cuda -emit-llvm-bc -o %t.o +// +// Same as above but we use '--undefined' to forcibly extract 'libz.a' +// +// RUN: clang-nvlink-wrapper --dry-run %t-x.a %t-u.a %t-y.a %t-z.a %t-w.a %t.o \ +// RUN: -u z -arch sm_52 -o a.out 2>&1 | FileCheck %s --check-prefix=LINK +// UNDEFINED: nvlink{{.*}} -arch sm_52 -o a.out [[INPUT:.+]].cubin {{.*}}-x-{{.*}}.cubin{{.*}}-y-{{.*}}.cubin{{.*}}-z-{{.*}}.cubin // // Check that the LTO interface works and properly preserves symbols used in a diff --git a/clang/test/Driver/riscv-profiles.c b/clang/test/Driver/riscv-profiles.c index 2b4d19422874cf..67e09d0e69ebc3 100644 --- a/clang/test/Driver/riscv-profiles.c +++ b/clang/test/Driver/riscv-profiles.c @@ -147,6 +147,7 @@ // RVA23U64: "-target-feature" "+zvbb" // RVA23U64: "-target-feature" "+zvfhmin" // RVA23U64: "-target-feature" "+zvkt" +// RVA23U64: "-target-feature" "+supm" // RUN: %clang --target=riscv64 -### -c %s 2>&1 -march=rva23s64 \ // RUN: | FileCheck -check-prefix=RVA23S64 %s @@ -186,6 +187,7 @@ // RVA23S64: "-target-feature" "+zvbb" // RVA23S64: "-target-feature" "+zvfhmin" // RVA23S64: "-target-feature" "+zvkt" +// RVA23S64: "-target-feature" "+sha" // RVA23S64: "-target-feature" "+shcounterenw" // RVA23S64: "-target-feature" "+shgatpa" // RVA23S64: "-target-feature" "+shtvala" @@ -201,6 +203,7 @@ // RVA23S64: "-target-feature" "+sstvala" // RVA23S64: "-target-feature" "+sstvecd" // RVA23S64: "-target-feature" "+ssu64xl" +// RVA23S64: "-target-feature" "+supm" // RVA23S64: "-target-feature" "+svade" // RVA23S64: "-target-feature" "+svbare" // RVA23S64: "-target-feature" "+svinval" diff --git a/clang/test/Modules/static-initializer.cppm b/clang/test/Modules/static-initializer.cppm new file mode 100644 index 00000000000000..10d4854ee67fa6 --- /dev/null +++ b/clang/test/Modules/static-initializer.cppm @@ -0,0 +1,18 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 %t/a.cpp -fmodule-file=a=%t/a.pcm -emit-llvm -o - | FileCheck %t/a.cpp + +//--- a.cppm +export module a; +int func(); +static int a = func(); + +//--- a.cpp +import a; + +// CHECK-NOT: internal global +// CHECK-NOT: __cxx_global_var_init + diff --git a/clang/test/Sema/attr-target-clones-aarch64.c b/clang/test/Sema/attr-target-clones-aarch64.c index a723c5965c5bcd..e101fefd2b67c4 100644 --- a/clang/test/Sema/attr-target-clones-aarch64.c +++ b/clang/test/Sema/attr-target-clones-aarch64.c @@ -7,7 +7,7 @@ void __attribute__((target_clones("default+sha3"))) warn1(void); // expected-error@+2 {{'target_clones' and 'target_version' attributes are not compatible}} // expected-note@+1 {{conflicting attribute is here}} -void __attribute__((target_version("sve-bf16"), target_clones("sme+memtag"))) not_compat(void); +void __attribute__((target_version("sve"), target_clones("sme+memtag"))) not_compat(void); int redecl(void); int __attribute__((target_clones("frintts", "simd+fp", "default"))) redecl(void) { return 1; } @@ -78,4 +78,4 @@ int useage(void) { // expected-error@+1 {{function declaration cannot become a multiversioned function after first usage}} int __attribute__((target_clones("sve2-sha3+ssbs", "sm4"))) mv_after_use(void) { return 1; } // expected-error@+1 {{'main' cannot be a multiversioned function}} -int __attribute__((target_clones("sve-i8mm"))) main() { return 1; } +int __attribute__((target_clones("i8mm"))) main() { return 1; } diff --git a/clang/test/SemaCXX/attr-target-version.cpp b/clang/test/SemaCXX/attr-target-version.cpp index 2c85f9735a87b1..c0a645713b2187 100644 --- a/clang/test/SemaCXX/attr-target-version.cpp +++ b/clang/test/SemaCXX/attr-target-version.cpp @@ -49,7 +49,7 @@ double __attribute__((target_version("rcpc"))) diff_type1(void); auto __attribute__((target_version("rcpc2"))) diff_type2(void) -> int { return 1; } //expected-error@+1 {{multiversioned function declaration has a different return type}} -auto __attribute__((target_version("sve-bf16"))) diff_type2(void) -> long { return (long)1; } +auto __attribute__((target_version("bf16"))) diff_type2(void) -> long { return (long)1; } int __attribute__((target_version("fp16fml"))) diff_type3(void) noexcept(false) { return 1; } //expected-error@+2 {{exception specification in declaration does not match previous declaration}} @@ -75,7 +75,7 @@ auto __attribute__((target_version("dpb2"))) ret3(void) -> int { return 1; } class Cls { __attribute__((target_version("rng"))) Cls(); // expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support constructors}} - __attribute__((target_version("sve-i8mm"))) ~Cls(); + __attribute__((target_version("i8mm"))) ~Cls(); // expected-error@-1 {{attribute 'target_version' multiversioned functions do not yet support destructors}} Cls &__attribute__((target_version("f32mm"))) operator=(const Cls &) = default; diff --git a/clang/test/SemaCXX/constexpr-string.cpp b/clang/test/SemaCXX/constexpr-string.cpp index 5448365489a514..c456740ef7551f 100644 --- a/clang/test/SemaCXX/constexpr-string.cpp +++ b/clang/test/SemaCXX/constexpr-string.cpp @@ -670,8 +670,6 @@ namespace MemcpyEtc { constexpr bool test_address_of_incomplete_struct_type() { // expected-error {{never produces a constant}} struct Incomplete; extern Incomplete x, y; - // expected-warning@+2 {{first argument in call to '__builtin_memcpy' is a pointer to non-trivially copyable type 'Incomplete'}} - // expected-note@+1 {{explicitly cast the pointer to silence this warning}} __builtin_memcpy(&x, &x, 4); // expected-note@-1 2{{cannot constant evaluate 'memcpy' between objects of incomplete type 'Incomplete'}} return true; diff --git a/clang/test/SemaCXX/gh113468.cpp b/clang/test/SemaCXX/gh113468.cpp new file mode 100644 index 00000000000000..94551986b0efaa --- /dev/null +++ b/clang/test/SemaCXX/gh113468.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s + +constexpr int expr() { + if (({ + int f; + f = 0; + if (f) + break; // expected-error {{'break' statement not in loop or switch statement}} + })) + return 2; + return 1; +} diff --git a/clang/test/SemaCXX/warn-memaccess.cpp b/clang/test/SemaCXX/warn-memaccess.cpp index b4b7f6a6905b23..070b44891a91aa 100644 --- a/clang/test/SemaCXX/warn-memaccess.cpp +++ b/clang/test/SemaCXX/warn-memaccess.cpp @@ -7,12 +7,17 @@ extern "C" void *memcpy(void *s1, const void *s2, unsigned n); class TriviallyCopyable {}; class NonTriviallyCopyable { NonTriviallyCopyable(const NonTriviallyCopyable&);}; +struct Incomplete; void test_bzero(TriviallyCopyable* tc, - NonTriviallyCopyable *ntc) { + NonTriviallyCopyable *ntc, + Incomplete* i) { // OK bzero(tc, sizeof(*tc)); + // OK + bzero(i, 10); + // expected-warning@+2{{first argument in call to 'bzero' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} bzero(ntc, sizeof(*ntc)); @@ -22,10 +27,14 @@ void test_bzero(TriviallyCopyable* tc, } void test_memset(TriviallyCopyable* tc, - NonTriviallyCopyable *ntc) { + NonTriviallyCopyable *ntc, + Incomplete* i) { // OK memset(tc, 0, sizeof(*tc)); + // OK + memset(i, 0, 10); + // expected-warning@+2{{first argument in call to 'memset' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} memset(ntc, 0, sizeof(*ntc)); @@ -36,10 +45,14 @@ void test_memset(TriviallyCopyable* tc, void test_memcpy(TriviallyCopyable* tc0, TriviallyCopyable* tc1, - NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1, + Incomplete *i0, Incomplete *i1) { // OK memcpy(tc0, tc1, sizeof(*tc0)); + // OK + memcpy(i0, i1, 10); + // expected-warning@+2{{first argument in call to 'memcpy' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} memcpy(ntc0, ntc1, sizeof(*ntc0)); @@ -52,10 +65,14 @@ void test_memcpy(TriviallyCopyable* tc0, TriviallyCopyable* tc1, } void test_memmove(TriviallyCopyable* tc0, TriviallyCopyable* tc1, - NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1) { + NonTriviallyCopyable *ntc0, NonTriviallyCopyable *ntc1, + Incomplete *i0, Incomplete *i1) { // OK memmove(tc0, tc1, sizeof(*tc0)); + // OK + memmove(i0, i1, 10); + // expected-warning@+2{{first argument in call to 'memmove' is a pointer to non-trivially copyable type 'NonTriviallyCopyable'}} // expected-note@+1{{explicitly cast the pointer to silence this warning}} memmove(ntc0, ntc1, sizeof(*ntc0)); diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp index bfc34b55c1f667..724d444638b57e 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-function-attr.cpp @@ -111,6 +111,37 @@ int testFoldExpression(Vs&&... v) { return (... + v); // expected-warning{{function introduces unsafe buffer manipulation}} } +struct HoldsUnsafeMembers { + HoldsUnsafeMembers() + : FromCtor(3), // expected-warning{{function introduces unsafe buffer manipulation}} + FromCtor2{3} // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + [[clang::unsafe_buffer_usage]] + HoldsUnsafeMembers(int i) + : FromCtor(i), // expected-warning{{function introduces unsafe buffer manipulation}} + FromCtor2{i} // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + HoldsUnsafeMembers(float f) + : HoldsUnsafeMembers(0) {} // expected-warning{{function introduces unsafe buffer manipulation}} + + UnsafeMembers FromCtor; + UnsafeMembers FromCtor2; + UnsafeMembers FromField{3}; // expected-warning 2{{function introduces unsafe buffer manipulation}} +}; + +struct SubclassUnsafeMembers : public UnsafeMembers { + SubclassUnsafeMembers() + : UnsafeMembers(3) // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + [[clang::unsafe_buffer_usage]] + SubclassUnsafeMembers(int i) + : UnsafeMembers(i) // expected-warning{{function introduces unsafe buffer manipulation}} + {} +}; + // https://github.com/llvm/llvm-project/issues/80482 void testClassMembers() { UnsafeMembers(3); // expected-warning{{function introduces unsafe buffer manipulation}} @@ -122,4 +153,95 @@ void testClassMembers() { UnsafeMembers()(); // expected-warning{{function introduces unsafe buffer manipulation}} testFoldExpression(UnsafeMembers(), UnsafeMembers()); + + HoldsUnsafeMembers(); + HoldsUnsafeMembers(3); // expected-warning{{function introduces unsafe buffer manipulation}} + + SubclassUnsafeMembers(); + SubclassUnsafeMembers(3); // expected-warning{{function introduces unsafe buffer manipulation}} +} + +// Not an aggregate, so its constructor is not implicit code and will be +// visited/checked for warnings. +struct NotCalledHoldsUnsafeMembers { + NotCalledHoldsUnsafeMembers() + : FromCtor(3), // expected-warning{{function introduces unsafe buffer manipulation}} + FromCtor2{3} // expected-warning{{function introduces unsafe buffer manipulation}} + {} + + UnsafeMembers FromCtor; + UnsafeMembers FromCtor2; + UnsafeMembers FromField{3}; // expected-warning{{function introduces unsafe buffer manipulation}} +}; + +// An aggregate, so its constructor is implicit code. Since it's not called, it +// is never generated. +struct AggregateUnused { + UnsafeMembers f1; + // While this field would trigger the warning during initialization, since + // it's unused, there's no code generated that does the initialization, so + // no warning. + UnsafeMembers f2{3}; +}; + +struct AggregateExplicitlyInitializedSafe { + UnsafeMembers f1; + // The warning is not fired as the field is always explicltly initialized + // elsewhere. This initializer is never used. + UnsafeMembers f2{3}; +}; + +void testAggregateExplicitlyInitializedSafe() { + AggregateExplicitlyInitializedSafe A{ + .f2 = UnsafeMembers(), // A safe constructor. + }; } + +struct AggregateExplicitlyInitializedUnsafe { + UnsafeMembers f1; + // The warning is not fired as the field is always explicltly initialized + // elsewhere. This initializer is never used. + UnsafeMembers f2{3}; +}; + +void testAggregateExplicitlyInitializedUnsafe() { + AggregateExplicitlyInitializedUnsafe A{ + .f2 = UnsafeMembers(3), // expected-warning{{function introduces unsafe buffer manipulation}} + }; +} + +struct AggregateViaAggregateInit { + UnsafeMembers f1; + // FIXME: A construction of this class does initialize the field through + // this initializer, so it should warn. Ideally it should also point to + // where the site of the construction is in testAggregateViaAggregateInit(). + UnsafeMembers f2{3}; +}; + +void testAggregateViaAggregateInit() { + AggregateViaAggregateInit A{}; +}; + +struct AggregateViaValueInit { + UnsafeMembers f1; + // FIXME: A construction of this class does initialize the field through + // this initializer, so it should warn. Ideally it should also point to + // where the site of the construction is in testAggregateViaValueInit(). + UnsafeMembers f2{3}; +}; + +void testAggregateViaValueInit() { + auto A = AggregateViaValueInit(); +}; + +struct AggregateViaDefaultInit { + UnsafeMembers f1; + // FIXME: A construction of this class does initialize the field through + // this initializer, so it should warn. Ideally it should also point to + // where the site of the construction is in testAggregateViaValueInit(). + UnsafeMembers f2{3}; +}; + +void testAggregateViaDefaultInit() { + AggregateViaDefaultInit A; +}; diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp index e97511593bbd81..c138fe088b3ba9 100644 --- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-in-container-span-construct.cpp @@ -157,3 +157,23 @@ namespace test_flag { } } //namespace test_flag + +struct HoldsStdSpanAndInitializedInCtor { + char* Ptr; + unsigned Size; + std::span Span{Ptr, Size}; // no-warning (this code is unreachable) + + HoldsStdSpanAndInitializedInCtor(char* P, unsigned S) + : Span(P, S) // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} + {} +}; + +struct HoldsStdSpanAndNotInitializedInCtor { + char* Ptr; + unsigned Size; + std::span Span{Ptr, Size}; // expected-warning{{the two-parameter std::span construction is unsafe as it can introduce mismatch between buffer size and the bound information}} + + HoldsStdSpanAndNotInitializedInCtor(char* P, unsigned S) + : Ptr(P), Size(S) + {} +}; diff --git a/clang/tools/clang-format/git-clang-format.bat b/clang/tools/clang-format/git-clang-format.bat index 19c82d8a04132b..a40276e63c5848 100644 --- a/clang/tools/clang-format/git-clang-format.bat +++ b/clang/tools/clang-format/git-clang-format.bat @@ -1 +1 @@ -py -3 "%~pn0" %* +py -3 "%~dpn0" %* diff --git a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp index 9fcecaee318a79..561b73c73ad7df 100644 --- a/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +++ b/clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp @@ -140,9 +140,6 @@ static std::list> TempFiles; /// Codegen flags for LTO backend. static codegen::RegisterCodeGenFlags CodeGenFlags; -/// Global flag to indicate that the LTO pipeline threw an error. -static std::atomic LTOError; - using OffloadingImage = OffloadBinary::OffloadingImage; namespace llvm { @@ -293,12 +290,10 @@ Expected findProgram(StringRef Name, ArrayRef Paths) { return *Path; } -/// We will defer LTO to the target's linker if we are not doing JIT and it is -/// supported by the toolchain. bool linkerSupportsLTO(const ArgList &Args) { llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); return Triple.isNVPTX() || Triple.isAMDGPU() || - Args.getLastArgValue(OPT_linker_path_EQ).ends_with("ld.lld"); + Args.getLastArgValue(OPT_linker_path_EQ).ends_with("lld"); } /// Returns the hashed value for a constant string. @@ -528,13 +523,11 @@ Expected clang(ArrayRef InputFiles, const ArgList &Args) { }; // Forward all of the `--offload-opt` and similar options to the device. - if (linkerSupportsLTO(Args)) { - CmdArgs.push_back("-flto"); - for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) - CmdArgs.append( - {"-Xlinker", - Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); - } + CmdArgs.push_back("-flto"); + for (auto &Arg : Args.filtered(OPT_offload_opt_eq_minus, OPT_mllvm)) + CmdArgs.append( + {"-Xlinker", + Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))}); if (!Triple.isNVPTX()) CmdArgs.push_back("-Wl,--no-undefined"); @@ -646,357 +639,6 @@ Expected linkDevice(ArrayRef InputFiles, } } -void diagnosticHandler(const DiagnosticInfo &DI) { - std::string ErrStorage; - raw_string_ostream OS(ErrStorage); - DiagnosticPrinterRawOStream DP(OS); - DI.print(DP); - - switch (DI.getSeverity()) { - case DS_Error: - WithColor::error(errs(), LinkerExecutable) << ErrStorage << "\n"; - LTOError = true; - break; - case DS_Warning: - WithColor::warning(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Note: - WithColor::note(errs(), LinkerExecutable) << ErrStorage << "\n"; - break; - case DS_Remark: - WithColor::remark(errs()) << ErrStorage << "\n"; - break; - } -} - -// Get the list of target features from the input file and unify them such that -// if there are multiple +xxx or -xxx features we only keep the last one. -std::vector getTargetFeatures(ArrayRef InputFiles) { - SmallVector Features; - for (const OffloadFile &File : InputFiles) { - for (auto Arg : llvm::split(File.getBinary()->getString("feature"), ",")) - Features.emplace_back(Arg); - } - - // Only add a feature if it hasn't been seen before starting from the end. - std::vector UnifiedFeatures; - DenseSet UsedFeatures; - for (StringRef Feature : llvm::reverse(Features)) { - if (UsedFeatures.insert(Feature.drop_front()).second) - UnifiedFeatures.push_back(Feature.str()); - } - - return UnifiedFeatures; -} - -template > -std::unique_ptr createLTO( - const ArgList &Args, const std::vector &Features, - ModuleHook Hook = [](size_t, const Module &) { return true; }) { - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - // We need to remove AMD's target-id from the processor if present. - StringRef TargetID = Args.getLastArgValue(OPT_arch_EQ); - StringRef Arch = clang::getProcessorFromTargetID(Triple, TargetID); - lto::Config Conf; - lto::ThinBackend Backend; - // TODO: Handle index-only thin-LTO - Backend = - lto::createInProcessThinBackend(llvm::heavyweight_hardware_concurrency()); - - Conf.CPU = Arch.str(); - Conf.Options = codegen::InitTargetOptionsFromCodeGenFlags(Triple); - - Conf.RemarksFilename = RemarksFilename; - Conf.RemarksPasses = RemarksPasses; - Conf.RemarksWithHotness = RemarksWithHotness; - Conf.RemarksHotnessThreshold = RemarksHotnessThreshold; - Conf.RemarksFormat = RemarksFormat; - - StringRef OptLevel = Args.getLastArgValue(OPT_opt_level, "O2"); - Conf.MAttrs = Features; - std::optional CGOptLevelOrNone = - CodeGenOpt::parseLevel(OptLevel[1]); - assert(CGOptLevelOrNone && "Invalid optimization level"); - Conf.CGOptLevel = *CGOptLevelOrNone; - Conf.OptLevel = OptLevel[1] - '0'; - Conf.DefaultTriple = Triple.getTriple(); - - // TODO: Should we complain about combining --opt-level and -passes, as opt - // does? That might be too limiting in clang-linker-wrapper, so for now we - // just warn in the help entry for -passes that the default corresponding - // to --opt-level=O? should be included there. The problem is that - // --opt-level produces effects in clang-linker-wrapper beyond what -passes - // appears to be able to achieve, so rejecting the combination of --opt-level - // and -passes would apparently make it impossible to combine those effects - // with a custom pass pipeline. - Conf.OptPipeline = PassPipeline; - Conf.PassPlugins = PassPlugins; - - LTOError = false; - Conf.DiagHandler = diagnosticHandler; - - Conf.PTO.LoopVectorization = Conf.OptLevel > 1; - Conf.PTO.SLPVectorization = Conf.OptLevel > 1; - - if (SaveTemps) { - std::string TempName = (sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + TargetID) - .str(); - Conf.PostInternalizeModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postlink.bc" - : TempName + "." + std::to_string(Task) + ".postlink.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - Conf.PreCodeGenModuleHook = [=](size_t Task, const Module &M) { - std::string File = - !Task ? TempName + ".postopt.bc" - : TempName + "." + std::to_string(Task) + ".postopt.bc"; - error_code EC; - raw_fd_ostream LinkedBitcode(File, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - return true; - }; - } - Conf.PostOptModuleHook = Hook; - Conf.CGFileType = (Triple.isNVPTX() || SaveTemps) - ? CodeGenFileType::AssemblyFile - : CodeGenFileType::ObjectFile; - - // TODO: Handle remark files - Conf.HasWholeProgramVisibility = Args.hasArg(OPT_whole_program); - - return std::make_unique(std::move(Conf), Backend); -} - -// Returns true if \p S is valid as a C language identifier and will be given -// `__start_` and `__stop_` symbols. -bool isValidCIdentifier(StringRef S) { - return !S.empty() && (isAlpha(S[0]) || S[0] == '_') && - llvm::all_of(llvm::drop_begin(S), - [](char C) { return C == '_' || isAlnum(C); }); -} - -Error linkBitcodeFiles(SmallVectorImpl &InputFiles, - SmallVectorImpl &OutputFiles, - const ArgList &Args) { - llvm::TimeTraceScope TimeScope("Link bitcode files"); - const llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ)); - StringRef Arch = Args.getLastArgValue(OPT_arch_EQ); - - SmallVector BitcodeInputFiles; - DenseSet StrongResolutions; - DenseSet UsedInRegularObj; - DenseSet UsedInSharedLib; - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - - // Search for bitcode files in the input and create an LTO input file. If - // it is not a bitcode file, scan its symbol table for symbols we need to - // save. - for (OffloadFile &File : InputFiles) { - MemoryBufferRef Buffer = MemoryBufferRef(File.getBinary()->getImage(), ""); - - file_magic Type = identify_magic(Buffer.getBuffer()); - switch (Type) { - case file_magic::bitcode: { - Expected IRSymtabOrErr = readIRSymtab(Buffer); - if (!IRSymtabOrErr) - return IRSymtabOrErr.takeError(); - - // Check for any strong resolutions we need to preserve. - for (unsigned I = 0; I != IRSymtabOrErr->Mods.size(); ++I) { - for (const auto &Sym : IRSymtabOrErr->TheReader.module_symbols(I)) { - if (!Sym.isFormatSpecific() && Sym.isGlobal() && !Sym.isWeak() && - !Sym.isUndefined()) - StrongResolutions.insert(Saver.save(Sym.Name)); - } - } - BitcodeInputFiles.emplace_back(std::move(File)); - continue; - } - case file_magic::elf_relocatable: - case file_magic::elf_shared_object: { - Expected> ObjFile = - ObjectFile::createObjectFile(Buffer); - if (!ObjFile) - continue; - - for (SymbolRef Sym : (*ObjFile)->symbols()) { - Expected Name = Sym.getName(); - if (!Name) - return Name.takeError(); - - // Record if we've seen these symbols in any object or shared - // libraries. - if ((*ObjFile)->isRelocatableObject()) - UsedInRegularObj.insert(Saver.save(*Name)); - else - UsedInSharedLib.insert(Saver.save(*Name)); - } - continue; - } - default: - continue; - } - } - - if (BitcodeInputFiles.empty()) - return Error::success(); - - // Remove all the bitcode files that we moved from the original input. - llvm::erase_if(InputFiles, [](OffloadFile &F) { return !F.getBinary(); }); - - // LTO Module hook to output bitcode without running the backend. - SmallVector BitcodeOutput; - auto OutputBitcode = [&](size_t, const Module &M) { - auto TempFileOrErr = createOutputFile(sys::path::filename(ExecutableName) + - "-jit-" + Triple.getTriple(), - "bc"); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - - std::error_code EC; - raw_fd_ostream LinkedBitcode(*TempFileOrErr, EC, sys::fs::OF_None); - if (EC) - reportError(errorCodeToError(EC)); - WriteBitcodeToFile(M, LinkedBitcode); - BitcodeOutput.push_back(*TempFileOrErr); - return false; - }; - - // We assume visibility of the whole program if every input file was - // bitcode. - auto Features = getTargetFeatures(BitcodeInputFiles); - auto LTOBackend = Args.hasArg(OPT_embed_bitcode) || - Args.hasArg(OPT_builtin_bitcode_EQ) || - Args.hasArg(OPT_clang_backend) - ? createLTO(Args, Features, OutputBitcode) - : createLTO(Args, Features); - - // We need to resolve the symbols so the LTO backend knows which symbols - // need to be kept or can be internalized. This is a simplified symbol - // resolution scheme to approximate the full resolution a linker would do. - uint64_t Idx = 0; - DenseSet PrevailingSymbols; - for (auto &BitcodeInput : BitcodeInputFiles) { - // Get a semi-unique buffer identifier for Thin-LTO. - StringRef Identifier = Saver.save( - std::to_string(Idx++) + "." + - BitcodeInput.getBinary()->getMemoryBufferRef().getBufferIdentifier()); - MemoryBufferRef Buffer = - MemoryBufferRef(BitcodeInput.getBinary()->getImage(), Identifier); - Expected> BitcodeFileOrErr = - llvm::lto::InputFile::create(Buffer); - if (!BitcodeFileOrErr) - return BitcodeFileOrErr.takeError(); - - // Save the input file and the buffer associated with its memory. - const auto Symbols = (*BitcodeFileOrErr)->symbols(); - SmallVector Resolutions(Symbols.size()); - size_t Idx = 0; - for (auto &Sym : Symbols) { - lto::SymbolResolution &Res = Resolutions[Idx++]; - - // We will use this as the prevailing symbol definition in LTO unless - // it is undefined or another definition has already been used. - Res.Prevailing = - !Sym.isUndefined() && - !(Sym.isWeak() && StrongResolutions.contains(Sym.getName())) && - PrevailingSymbols.insert(Saver.save(Sym.getName())).second; - - // We need LTO to preseve the following global symbols: - // 1) Symbols used in regular objects. - // 2) Sections that will be given a __start/__stop symbol. - // 3) Prevailing symbols that are needed visible to external - // libraries. - Res.VisibleToRegularObj = - UsedInRegularObj.contains(Sym.getName()) || - isValidCIdentifier(Sym.getSectionName()) || - (Res.Prevailing && - (Sym.getVisibility() != GlobalValue::HiddenVisibility && - !Sym.canBeOmittedFromSymbolTable())); - - // Identify symbols that must be exported dynamically and can be - // referenced by other files. - Res.ExportDynamic = - Sym.getVisibility() != GlobalValue::HiddenVisibility && - (UsedInSharedLib.contains(Sym.getName()) || - !Sym.canBeOmittedFromSymbolTable()); - - // The final definition will reside in this linkage unit if the symbol - // is defined and local to the module. This only checks for bitcode - // files, full assertion will require complete symbol resolution. - Res.FinalDefinitionInLinkageUnit = - Sym.getVisibility() != GlobalValue::DefaultVisibility && - (!Sym.isUndefined() && !Sym.isCommon()); - - // We do not support linker redefined symbols (e.g. --wrap) for device - // image linking, so the symbols will not be changed after LTO. - Res.LinkerRedefined = false; - } - - // Add the bitcode file with its resolved symbols to the LTO job. - if (Error Err = LTOBackend->add(std::move(*BitcodeFileOrErr), Resolutions)) - return Err; - } - - // Run the LTO job to compile the bitcode. - size_t MaxTasks = LTOBackend->getMaxTasks(); - SmallVector Files(MaxTasks); - auto AddStream = - [&](size_t Task, - const Twine &ModuleName) -> std::unique_ptr { - int FD = -1; - auto &TempFile = Files[Task]; - StringRef Extension = (Triple.isNVPTX() || SaveTemps) ? "s" : "o"; - std::string TaskStr = Task ? "." + std::to_string(Task) : ""; - auto TempFileOrErr = - createOutputFile(sys::path::filename(ExecutableName) + "." + - Triple.getTriple() + "." + Arch + TaskStr, - Extension); - if (!TempFileOrErr) - reportError(TempFileOrErr.takeError()); - TempFile = *TempFileOrErr; - if (std::error_code EC = sys::fs::openFileForWrite(TempFile, FD)) - reportError(errorCodeToError(EC)); - return std::make_unique( - std::make_unique(FD, true)); - }; - - if (Error Err = LTOBackend->run(AddStream)) - return Err; - - if (LTOError) - return createStringError("Errors encountered inside the LTO pipeline."); - - // If we are embedding bitcode we only need the intermediate output. - bool SingleOutput = Files.size() == 1; - if (Args.hasArg(OPT_embed_bitcode)) { - if (BitcodeOutput.size() != 1 || !SingleOutput) - return createStringError("Cannot embed bitcode with multiple files."); - OutputFiles.push_back(Args.MakeArgString(BitcodeOutput.front())); - return Error::success(); - } - - // Append the new inputs to the device linker input. If the user requested - // an internalizing link we need to pass the bitcode to clang. - for (StringRef File : - Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ) - ? BitcodeOutput - : Files) - OutputFiles.push_back(File); - - return Error::success(); -} - Expected writeOffloadFile(const OffloadFile &File) { const OffloadBinary &Binary = *File.getBinary(); @@ -1327,15 +969,8 @@ Expected> linkAndWrapDeviceFiles( if (File.getBinary()->getOffloadKind() != OFK_None) ActiveOffloadKinds.insert(File.getBinary()->getOffloadKind()); - // First link and remove all the input files containing bitcode if - // the target linker does not support it natively. + // Write any remaining device inputs to an output file. SmallVector InputFiles; - if (!linkerSupportsLTO(LinkerArgs)) - if (Error Err = linkBitcodeFiles(Input, InputFiles, LinkerArgs)) - return Err; - - // Write any remaining device inputs to an output file for the - // linker. for (const OffloadFile &File : Input) { auto FileNameOrErr = writeOffloadFile(File); if (!FileNameOrErr) @@ -1344,10 +979,7 @@ Expected> linkAndWrapDeviceFiles( } // Link the remaining device files using the device linker. - auto OutputOrErr = - !Args.hasArg(OPT_embed_bitcode) || linkerSupportsLTO(LinkerArgs) - ? linkDevice(InputFiles, LinkerArgs) - : InputFiles.front(); + auto OutputOrErr = linkDevice(InputFiles, LinkerArgs); if (!OutputOrErr) return OutputOrErr.takeError(); diff --git a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp index b9767a7a03d0b5..bc191afdca739d 100644 --- a/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp +++ b/clang/tools/clang-nvlink-wrapper/ClangNVLinkWrapper.cpp @@ -250,6 +250,7 @@ struct Symbol { }; Symbol() : File(), Flags(None), UsedInRegularObj(false) {} + Symbol(Symbol::Flags Flags) : File(), Flags(Flags), UsedInRegularObj(true) {} Symbol(MemoryBufferRef File, const irsymtab::Reader::SymbolRef Sym) : File(File), Flags(0), UsedInRegularObj(false) { @@ -535,6 +536,8 @@ Expected> getInput(const ArgList &Args) { bool Extracted = true; StringMap SymTab; + for (auto &Sym : Args.getAllArgValues(OPT_u)) + SymTab[Sym] = Symbol(Symbol::Undefined); SmallVector> LinkerInput; while (Extracted) { Extracted = false; diff --git a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td index a80c5937b42992..6de1a25c14f8be 100644 --- a/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td +++ b/clang/tools/clang-nvlink-wrapper/NVLinkOpts.td @@ -43,11 +43,11 @@ def plugin : JoinedOrSeparate<["--", "-"], "plugin">, Flags<[HelpHidden, WrapperOnlyOption]>; def arch : Separate<["--", "-"], "arch">, - HelpText<"Specify the 'sm_' name of the target architecture.">; + HelpText<"Specify the 'sm_' name of the target architecture">; def : Joined<["--", "-"], "plugin-opt=mcpu=">, Flags<[HelpHidden, WrapperOnlyOption]>, Alias; -def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile.">; +def g : Flag<["-"], "g">, HelpText<"Specify that this was a debug compile">; def debug : Flag<["--"], "debug">, Alias; def lto_emit_llvm : Flag<["--"], "lto-emit-llvm">, Flags<[WrapperOnlyOption]>, @@ -55,6 +55,9 @@ def lto_emit_llvm : Flag<["--"], "lto-emit-llvm">, Flags<[WrapperOnlyOption]>, def lto_emit_asm : Flag<["--"], "lto-emit-asm">, Flags<[WrapperOnlyOption]>, HelpText<"Emit assembly code">; +def u : JoinedOrSeparate<["-"], "u">, HelpText<"Force undefined symbol during linking">; +def undefined : JoinedOrSeparate<["--"], "undefined">, Alias; + def O : Joined<["--", "-"], "plugin-opt=O">, Flags<[WrapperOnlyOption]>, MetaVarName<"">, HelpText<"Optimization level for LTO">; diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp index d696375547acce..056b7c7b571ef4 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp @@ -3342,6 +3342,45 @@ TEST_P(ASTMatchersTest, declStmt(isInTemplateInstantiation()))); } +TEST_P(ASTMatchersTest, IsInstantiated_MatchesVariableInstantiation) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(matches("template int V = 10; void x() { V; }", + varDecl(isInstantiated()))); +} + +TEST_P(ASTMatchersTest, IsInstantiated_NotMatchesVariableDefinition) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(notMatches("template int V = 10;", + varDecl(isInstantiated()))); +} + +TEST_P(ASTMatchersTest, + IsInTemplateInstantiation_MatchesVariableInstantiationStmt) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(matches( + "template auto V = []() { T i; }; void x() { V(); }", + declStmt(isInTemplateInstantiation()))); +} + +TEST_P(ASTMatchersTest, + IsInTemplateInstantiation_NotMatchesVariableDefinitionStmt) { + if (!GetParam().isCXX14OrLater()) { + return; + } + + EXPECT_TRUE(notMatches("template auto V = []() { T i; };", + declStmt(isInTemplateInstantiation()))); +} + TEST_P(ASTMatchersTest, IsInTemplateInstantiation_Sharing) { if (!GetParam().isCXX()) { return; diff --git a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp index 20231ee1502c12..57a1fc06b26b16 100644 --- a/clang/utils/TableGen/ClangBuiltinsEmitter.cpp +++ b/clang/utils/TableGen/ClangBuiltinsEmitter.cpp @@ -64,7 +64,8 @@ class PrototypeParser { // detecting the comma of the template class as a separator for // the parameters of the prototype. Note: the assumption is that // we cannot have nested _ExtVector. - if (Current.starts_with("_ExtVector<")) { + if (Current.starts_with("_ExtVector<") || + Current.starts_with("_Vector<")) { const size_t EndTemplate = Current.find('>', 0); ParseType(Current.substr(0, EndTemplate + 1)); // Move the prototype beyond _ExtVector<...> @@ -123,7 +124,8 @@ class PrototypeParser { if (Substitution.empty()) PrintFatalError(Loc, "Not a template"); ParseType(Substitution); - } else if (T.consume_front("_ExtVector")) { + } else if (auto IsExt = T.consume_front("_ExtVector"); + IsExt || T.consume_front("_Vector")) { // Clang extended vector types are mangled as follows: // // '_ExtVector<' ',' '>' @@ -135,7 +137,7 @@ class PrototypeParser { unsigned long long Lanes; if (consumeUnsignedInteger(T, 10, Lanes)) PrintFatalError(Loc, "Expected number of lanes after '_ExtVector<'"); - Type += "E" + std::to_string(Lanes); + Type += (IsExt ? "E" : "V") + std::to_string(Lanes); if (!T.consume_front(",")) PrintFatalError(Loc, "Expected ',' after number of lanes in '_ExtVector<'"); diff --git a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc index 902fa8f79ab816..e454524c9cb6a2 100644 --- a/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc +++ b/compiler-rt/lib/builtins/cpu_model/AArch64CPUFeatures.inc @@ -53,9 +53,9 @@ enum CPUFeatures { FEAT_EBF16, FEAT_RPRES, FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, + RESERVED_FEAT_SVE_BF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_EBF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_I8MM, // previously used and now ABI legacy FEAT_SVE_F32MM, FEAT_SVE_F64MM, FEAT_SVE2, diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc index 0c76a4fe9b9f2f..4e25feb2e90c63 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc @@ -65,14 +65,10 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_I8MM); if (hwcap2 & HWCAP2_EBF16) setCPUFeature(FEAT_EBF16); - if (hwcap2 & HWCAP2_SVE_EBF16) - setCPUFeature(FEAT_SVE_EBF16); if (hwcap2 & HWCAP2_DGH) setCPUFeature(FEAT_DGH); if (hwcap2 & HWCAP2_FRINT) setCPUFeature(FEAT_FRINTTS); - if (hwcap2 & HWCAP2_SVEI8MM) - setCPUFeature(FEAT_SVE_I8MM); if (hwcap2 & HWCAP2_SVEF32MM) setCPUFeature(FEAT_SVE_F32MM); if (hwcap2 & HWCAP2_SVEF64MM) @@ -119,8 +115,6 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_RCPC3); if (hwcap2 & HWCAP2_BF16) setCPUFeature(FEAT_BF16); - if (hwcap2 & HWCAP2_SVEBF16) - setCPUFeature(FEAT_SVE_BF16); if (hwcap & HWCAP_SVE) setCPUFeature(FEAT_SVE); if (hwcap2 & HWCAP2_SVE2) diff --git a/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c b/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c index 8fa07861371d56..e02ab5b28ce046 100644 --- a/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c +++ b/compiler-rt/test/hwasan/TestCases/many-threads-uaf.c @@ -23,6 +23,7 @@ void *BoringThread(void *arg) { void *UAFThread(void *arg) { char * volatile x = (char*)malloc(10); fprintf(stderr, "ZZZ %p\n", x); + fflush(stderr); free(x); x[5] = 42; // CHECK: ERROR: HWAddressSanitizer: tag-mismatch on address diff --git a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c index 78bef538af1161..da1cb686969206 100644 --- a/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c +++ b/compiler-rt/test/hwasan/TestCases/mem-intrinsics.c @@ -21,6 +21,7 @@ int main() { memcpy(Q, P, 32); #endif write(STDOUT_FILENO, "recovered\n", 10); + fflush(stdout); // WRITE: ERROR: HWAddressSanitizer: tag-mismatch on address // WRITE: WRITE of size 32 at {{.*}} tags: [[PTR_TAG:..]]/[[MEM_TAG:..]] (ptr/mem) // WRITE: Invalid access starting at offset 16 diff --git a/compiler-rt/test/hwasan/TestCases/use-after-free.c b/compiler-rt/test/hwasan/TestCases/use-after-free.c index 070622f560a225..fe4f8b32ea1006 100644 --- a/compiler-rt/test/hwasan/TestCases/use-after-free.c +++ b/compiler-rt/test/hwasan/TestCases/use-after-free.c @@ -15,6 +15,7 @@ int main() { free(x); __hwasan_disable_allocator_tagging(); fprintf(stderr, ISREAD ? "Going to do a READ\n" : "Going to do a WRITE\n"); + fflush(stderr); // CHECK: Going to do a [[TYPE:[A-Z]*]] int r = 0; if (ISREAD) r = x[5]; else x[5] = 42; // should be on the same line. @@ -31,11 +32,11 @@ int main() { // // CHECK: freed by thread {{.*}} here: // CHECK: #0 {{.*}} in {{.*}}free{{.*}} {{.*}}hwasan_allocation_functions.cpp - // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-19]] + // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-20]] // CHECK: previously allocated by thread {{.*}} here: // CHECK: #0 {{.*}} in {{.*}}malloc{{.*}} {{.*}}hwasan_allocation_functions.cpp - // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-24]] + // CHECK: #1 {{.*}} in main {{.*}}use-after-free.c:[[@LINE-25]] // CHECK: Memory tags around the buggy address (one tag corresponds to 16 bytes): // CHECK: =>{{.*}}[[MEM_TAG]] // CHECK: SUMMARY: HWAddressSanitizer: tag-mismatch {{.*}} in main diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 868a8b4e287424..f5fb272b4cc3ed 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -289,6 +289,7 @@ struct IntrinsicLibrary { template mlir::Value genIeeeQuietCompare(mlir::Type resultType, llvm::ArrayRef); + mlir::Value genIeeeReal(mlir::Type, llvm::ArrayRef); mlir::Value genIeeeRint(mlir::Type, llvm::ArrayRef); template void genIeeeSetFlagOrHaltingMode(llvm::ArrayRef); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 31ad1b7c6ce5b5..67f7e1aac40edb 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -517,6 +517,7 @@ class ParseTreeDumper { NODE_ENUM(OmpTaskDependenceType, Type) NODE(parser, OmpDependSinkVec) NODE(parser, OmpDependSinkVecLength) + NODE(parser, OmpDestroyClause) NODE(parser, OmpEndAllocators) NODE(parser, OmpEndAtomic) NODE(parser, OmpEndBlockDirective) @@ -571,6 +572,7 @@ class ParseTreeDumper { NODE_ENUM(OmpDeviceClause, DeviceModifier) NODE(parser, OmpDeviceTypeClause) NODE_ENUM(OmpDeviceTypeClause, Type) + NODE(parser, OmpUpdateClause) NODE(parser, OmpScheduleModifier) NODE(OmpScheduleModifier, Modifier1) NODE(OmpScheduleModifier, Modifier2) @@ -609,6 +611,7 @@ class ParseTreeDumper { NODE(parser, OmpAtomicClauseList) NODE(parser, OmpAtomicDefaultMemOrderClause) NODE_ENUM(common, OmpAtomicDefaultMemOrderType) + NODE(parser, OpenMPDepobjConstruct) NODE(parser, OpenMPFlushConstruct) NODE(parser, OpenMPLoopConstruct) NODE(parser, OpenMPExecutableAllocate) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 174f4c631e9d4c..13c3353512208b 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3447,7 +3447,7 @@ WRAPPER_CLASS(OmpObjectList, std::list); // MUTEXINOUTSET | DEPOBJ | // since 5.0 // INOUTSET // since 5.2 struct OmpTaskDependenceType { - ENUM_CLASS(Type, In, Out, Inout, Source, Sink) + ENUM_CLASS(Type, In, Out, Inout, Source, Sink, Depobj) WRAPPER_CLASS_BOILERPLATE(OmpTaskDependenceType, Type); }; @@ -3527,19 +3527,6 @@ struct OmpDefaultmapClause { std::tuple> t; }; -// device([ device-modifier :] scalar-integer-expression) -struct OmpDeviceClause { - TUPLE_CLASS_BOILERPLATE(OmpDeviceClause); - ENUM_CLASS(DeviceModifier, Ancestor, Device_Num) - std::tuple, ScalarIntExpr> t; -}; - -// device_type(any | host | nohost) -struct OmpDeviceTypeClause { - ENUM_CLASS(Type, Any, Host, Nohost) - WRAPPER_CLASS_BOILERPLATE(OmpDeviceTypeClause, Type); -}; - // 2.13.9 depend-vec-length -> +/- non-negative-constant struct OmpDependSinkVecLength { TUPLE_CLASS_BOILERPLATE(OmpDependSinkVecLength); @@ -3561,6 +3548,8 @@ struct OmpDependSinkVec { // // depend-modifier -> iterator-modifier // since 5.0 struct OmpDependClause { + OmpTaskDependenceType::Type GetDepType() const; + UNION_CLASS_BOILERPLATE(OmpDependClause); EMPTY_CLASS(Source); WRAPPER_CLASS(Sink, std::list); @@ -3573,6 +3562,26 @@ struct OmpDependClause { std::variant u; }; +// Ref: [5.0:254-255], [5.1:287-288], [5.2:73] +// +// destroy-clause -> +// DESTROY | // since 5.0, until 5.2 +// DESTROY(variable) // since 5.2 +WRAPPER_CLASS(OmpDestroyClause, OmpObject); + +// device([ device-modifier :] scalar-integer-expression) +struct OmpDeviceClause { + TUPLE_CLASS_BOILERPLATE(OmpDeviceClause); + ENUM_CLASS(DeviceModifier, Ancestor, Device_Num) + std::tuple, ScalarIntExpr> t; +}; + +// device_type(any | host | nohost) +struct OmpDeviceTypeClause { + ENUM_CLASS(Type, Any, Host, Nohost) + WRAPPER_CLASS_BOILERPLATE(OmpDeviceTypeClause, Type); +}; + // OMP 5.2 12.6.1 grainsize-clause -> grainsize ([prescriptiveness :] value) struct OmpGrainsizeClause { TUPLE_CLASS_BOILERPLATE(OmpGrainsizeClause); @@ -3716,6 +3725,11 @@ struct OmpNumTasksClause { std::tuple, ScalarIntExpr> t; }; +// Ref: [5.0:254-255], [5.1:287-288], [5.2:321-322] +// +// update-clause -> UPDATE(task-dependence-type) // since 5.0 +WRAPPER_CLASS(OmpUpdateClause, OmpTaskDependenceType); + // OpenMP Clauses struct OmpClause { UNION_CLASS_BOILERPLATE(OmpClause); @@ -4023,6 +4037,18 @@ struct OpenMPCancelConstruct { std::tuple> t; }; +// Ref: [5.0:254-255], [5.1:287-288], [5.2:322-323] +// +// depobj-construct -> DEPOBJ(depend-object) depobj-clause // since 5.0 +// depobj-clause -> depend-clause | // until 5.2 +// destroy-clause | +// update-clause +struct OpenMPDepobjConstruct { + TUPLE_CLASS_BOILERPLATE(OpenMPDepobjConstruct); + CharBlock source; + std::tuple t; +}; + // 2.17.8 flush -> FLUSH [memory-order-clause] [(variable-name-list)] struct OpenMPFlushConstruct { TUPLE_CLASS_BOILERPLATE(OpenMPFlushConstruct); @@ -4047,7 +4073,8 @@ struct OpenMPStandaloneConstruct { UNION_CLASS_BOILERPLATE(OpenMPStandaloneConstruct); CharBlock source; std::variant + OpenMPCancelConstruct, OpenMPCancellationPointConstruct, + OpenMPDepobjConstruct> u; }; diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h index 3c3ae73d4ad7a1..fb48152d707182 100644 --- a/flang/include/flang/Runtime/CUDA/memory.h +++ b/flang/include/flang/Runtime/CUDA/memory.h @@ -36,19 +36,18 @@ void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a pointer to a descriptor. -void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src, +void RTDECL(CUFDataTransferDescPtr)(Descriptor *dst, void *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a descriptor to a pointer. -void RTDECL(CUFDataTransferPtrDesc)(void *dst, const Descriptor &src, +void RTDECL(CUFDataTransferPtrDesc)(void *dst, Descriptor *src, std::size_t bytes, unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); /// Data transfer from a descriptor to a descriptor. -void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dst, - const Descriptor &src, unsigned mode, const char *sourceFile = nullptr, - int sourceLine = 0); +void RTDECL(CUFDataTransferDescDesc)(Descriptor *dst, Descriptor *src, + unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0); } // extern "C" } // namespace Fortran::runtime::cuda diff --git a/flang/include/flang/Semantics/openmp-directive-sets.h b/flang/include/flang/Semantics/openmp-directive-sets.h index 50d6d5b59ef7dd..55ef1e0ca61b9f 100644 --- a/flang/include/flang/Semantics/openmp-directive-sets.h +++ b/flang/include/flang/Semantics/openmp-directive-sets.h @@ -210,6 +210,7 @@ static const OmpDirectiveSet blockConstructSet{ Directive::OMPD_ordered, Directive::OMPD_parallel, Directive::OMPD_parallel_masked, + Directive::OMPD_parallel_master, Directive::OMPD_parallel_workshare, Directive::OMPD_scope, Directive::OMPD_single, diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 0767d8ea84bc6b..b9512f33eaacd5 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -755,7 +755,7 @@ class Symbol { OmpDeclarativeAllocateDirective, OmpExecutableAllocateDirective, OmpDeclareSimd, OmpDeclareTarget, OmpThreadprivate, OmpDeclareReduction, OmpFlushed, OmpCriticalLock, OmpIfSpecified, OmpNone, OmpPreDetermined, - OmpImplicit); + OmpImplicit, OmpDependObject); using Flags = common::EnumSet; const Scope &owner() const { return *owner_; } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 7c254ce673855a..8eb1fdb4709178 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -137,6 +137,8 @@ genDependKindAttr(fir::FirOpBuilder &firOpBuilder, case omp::clause::Depend::TaskDependenceType::Mutexinoutset: case omp::clause::Depend::TaskDependenceType::Inoutset: case omp::clause::Depend::TaskDependenceType::Depobj: + case omp::clause::Depend::TaskDependenceType::Sink: + case omp::clause::Depend::TaskDependenceType::Source: llvm_unreachable("unhandled parser task dependence type"); break; } diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 9483f643acd55a..45b89de023a4bf 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -338,6 +338,27 @@ ReductionOperator makeReductionOperator(const parser::OmpReductionOperator &inp, inp.u); } +clause::TaskDependenceType +makeDepType(const parser::OmpTaskDependenceType &inp) { + switch (inp.v) { + case parser::OmpTaskDependenceType::Type::Depobj: + return clause::TaskDependenceType::Depobj; + case parser::OmpTaskDependenceType::Type::In: + return clause::TaskDependenceType::In; + case parser::OmpTaskDependenceType::Type::Inout: + return clause::TaskDependenceType::Inout; + // Inoutset // missing-in-parser + // Mutexinoutset // missing-in-parser + case parser::OmpTaskDependenceType::Type::Out: + return clause::TaskDependenceType::Out; + case parser::OmpTaskDependenceType::Type::Sink: + return clause::TaskDependenceType::Sink; + case parser::OmpTaskDependenceType::Type::Source: + return clause::TaskDependenceType::Source; + } + llvm_unreachable("Unexpected dependence type"); +} + // -------------------------------------------------------------------- // Actual clauses. Each T (where tomp::T exists in ClauseT) has its "make". @@ -554,18 +575,6 @@ Depend make(const parser::OmpClause::Depend &inp, // Iteration is the equivalent of parser::OmpDependSinkVec using Iteration = Doacross::Vector::value_type; // LoopIterationT - CLAUSET_ENUM_CONVERT( // - convert1, parser::OmpTaskDependenceType::Type, Depend::TaskDependenceType, - // clang-format off - MS(In, In) - MS(Out, Out) - MS(Inout, Inout) - // MS(, Mutexinoutset) // missing-in-parser - // MS(, Inputset) // missing-in-parser - // MS(, Depobj) // missing-in-parser - // clang-format on - ); - return Depend{Fortran::common::visit( // common::visitors{ // Doacross @@ -602,7 +611,7 @@ Depend make(const parser::OmpClause::Depend &inp, auto &&maybeIter = maybeApply( [&](auto &&s) { return makeIterator(s, semaCtx); }, t0); - return Depend::DepType{{/*TaskDependenceType=*/convert1(t1.v), + return Depend::DepType{{/*TaskDependenceType=*/makeDepType(t1), /*Iterator=*/std::move(maybeIter), /*LocatorList=*/makeObjects(t2, semaCtx)}}; }, @@ -614,8 +623,14 @@ Depend make(const parser::OmpClause::Depend &inp, Destroy make(const parser::OmpClause::Destroy &inp, semantics::SemanticsContext &semaCtx) { - // inp -> empty - llvm_unreachable("Empty: destroy"); + // inp.v -> std::optional + auto &&maybeObject = maybeApply( + [&](const parser::OmpDestroyClause &c) { + return makeObject(c.v, semaCtx); + }, + inp.v); + + return Destroy{/*DestroyVar=*/std::move(maybeObject)}; } Detach make(const parser::OmpClause::Detach &inp, @@ -1279,8 +1294,8 @@ Uniform make(const parser::OmpClause::Uniform &inp, Update make(const parser::OmpClause::Update &inp, semantics::SemanticsContext &semaCtx) { - // inp -> empty - return Update{/*TaskDependenceType=*/std::nullopt}; + // inp.v -> parser::OmpUpdateClause + return Update{/*TaskDependenceType=*/makeDepType(inp.v.v)}; } Use make(const parser::OmpClause::Use &inp, diff --git a/flang/lib/Lower/OpenMP/Clauses.h b/flang/lib/Lower/OpenMP/Clauses.h index 1e911a20468575..51180ebfe5745e 100644 --- a/flang/lib/Lower/OpenMP/Clauses.h +++ b/flang/lib/Lower/OpenMP/Clauses.h @@ -152,6 +152,7 @@ using IteratorSpecifier = tomp::type::IteratorSpecifierT; using DefinedOperator = tomp::type::DefinedOperatorT; using ProcedureDesignator = tomp::type::ProcedureDesignatorT; using ReductionOperator = tomp::type::ReductionIdentifierT; +using TaskDependenceType = tomp::type::TaskDependenceType; // "Requires" clauses are handled early on, and the aggregated information // is stored in the Symbol details of modules, programs, and subprograms. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 01a40d6e2204ef..84985b880b1ec2 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -76,6 +76,18 @@ struct EntryBlockArgs { reduction.isValid() && taskReduction.isValid() && useDeviceAddr.isValid() && useDevicePtr.isValid(); } + + auto getSyms() const { + return llvm::concat( + inReduction.syms, map.syms, priv.syms, reduction.syms, + taskReduction.syms, useDeviceAddr.syms, useDevicePtr.syms); + } + + auto getVars() const { + return llvm::concat( + inReduction.vars, map.vars, priv.vars, reduction.vars, + taskReduction.vars, useDeviceAddr.vars, useDevicePtr.vars); + } }; } // namespace @@ -1506,8 +1518,7 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable, genEntryBlock(converter, args, op->getRegion(0)); bindEntryBlockArgs( converter, llvm::cast(op), args); - return llvm::to_vector(llvm::concat( - args.priv.syms, args.reduction.syms)); + return llvm::to_vector(args.getSyms()); }; assert((!enableDelayedPrivatization || dsp) && @@ -1581,11 +1592,11 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, mlir::Operation *terminator = lower::genOpenMPTerminator(builder, sectionsOp, loc); - auto reductionCallback = [&](mlir::Operation *op) { + auto genRegionEntryCB = [&](mlir::Operation *op) { genEntryBlock(converter, args, op->getRegion(0)); bindEntryBlockArgs( converter, llvm::cast(op), args); - return reductionSyms; + return llvm::to_vector(args.getSyms()); }; // Generate nested SECTION constructs. @@ -1611,7 +1622,7 @@ genSectionsOp(lower::AbstractConverter &converter, lower::SymMap &symTable, OpWithBodyGenInfo(converter, symTable, semaCtx, loc, nestedEval, llvm::omp::Directive::OMPD_section) .setClauses(§ionQueue.begin()->clauses) - .setGenRegionEntryCb(reductionCallback), + .setGenRegionEntryCb(genRegionEntryCB), sectionQueue, sectionQueue.begin()); } @@ -2699,6 +2710,21 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); } +static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + const parser::OpenMPDepobjConstruct &construct) { + // These values will be ignored until the construct itself is implemented, + // but run them anyway for the sake of testing (via a Todo test). + auto &ompObj = std::get(construct.t); + const Object &depObj = makeObject(ompObj, semaCtx); + Clause clause = makeClause(std::get(construct.t), semaCtx); + (void)depObj; + (void)clause; + + TODO(converter.getCurrentLocation(), "OpenMPDepobjConstruct"); +} + static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 462193a850c487..7c7c8ee539111d 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -97,7 +97,6 @@ static bool isStaticallyPresent(const fir::ExtendedValue &exv) { /// IEEE module procedure names not yet implemented for genModuleProcTODO. static constexpr char ieee_get_underflow_mode[] = "ieee_get_underflow_mode"; -static constexpr char ieee_real[] = "ieee_real"; static constexpr char ieee_rem[] = "ieee_rem"; static constexpr char ieee_set_underflow_mode[] = "ieee_set_underflow_mode"; @@ -362,7 +361,7 @@ static constexpr IntrinsicHandler handlers[]{ {"ieee_quiet_le", &I::genIeeeQuietCompare}, {"ieee_quiet_lt", &I::genIeeeQuietCompare}, {"ieee_quiet_ne", &I::genIeeeQuietCompare}, - {"ieee_real", &I::genModuleProcTODO}, + {"ieee_real", &I::genIeeeReal}, {"ieee_rem", &I::genModuleProcTODO}, {"ieee_rint", &I::genIeeeRint}, {"ieee_round_eq", &I::genIeeeTypeCompare}, @@ -4799,6 +4798,238 @@ IntrinsicLibrary::genIeeeQuietCompare(mlir::Type resultType, return builder.create(loc, resultType, res); } +// IEEE_REAL +mlir::Value IntrinsicLibrary::genIeeeReal(mlir::Type resultType, + llvm::ArrayRef args) { + // Convert integer or real argument A to a real of a specified kind. + // Round according to the current rounding mode. + // Signal IEEE_INVALID if A is an sNaN, and return a qNaN. + // Signal IEEE_UNDERFLOW for an inexact subnormal or zero result. + // Signal IEEE_OVERFLOW if A is finite and the result is infinite. + // Signal IEEE_INEXACT for an inexact result. + // + // if (type(a) == resultType) { + // // Conversion to the same type is a nop except for sNaN processing. + // result = a + // } else { + // result = r = real(a, kind(result)) + // // Conversion to a larger type is exact. + // if (c_sizeof(a) >= c_sizeof(r)) { + // b = (a is integer) ? int(r, kind(a)) : real(r, kind(a)) + // if (a == b || isNaN(a)) { + // // a is {-0, +0, -inf, +inf, NaN} or exact; result is r + // } else { + // // odd(r) is true if the low bit of significand(r) is 1 + // // rounding mode ieee_other is an alias for mode ieee_nearest + // if (a < b) { + // if (mode == ieee_nearest && odd(r)) result = ieee_next_down(r) + // if (mode == ieee_other && odd(r)) result = ieee_next_down(r) + // if (mode == ieee_to_zero && a > 0) result = ieee_next_down(r) + // if (mode == ieee_away && a < 0) result = ieee_next_down(r) + // if (mode == ieee_down) result = ieee_next_down(r) + // } else { // a > b + // if (mode == ieee_nearest && odd(r)) result = ieee_next_up(r) + // if (mode == ieee_other && odd(r)) result = ieee_next_up(r) + // if (mode == ieee_to_zero && a < 0) result = ieee_next_up(r) + // if (mode == ieee_away && a > 0) result = ieee_next_up(r) + // if (mode == ieee_up) result = ieee_next_up(r) + // } + // } + // } + // } + + assert(args.size() == 2); + mlir::Type i1Ty = builder.getI1Type(); + mlir::Type f32Ty = mlir::FloatType::getF32(builder.getContext()); + mlir::Value a = args[0]; + mlir::Type aType = a.getType(); + + // If the argument is an sNaN, raise an invalid exception and return a qNaN. + // Otherwise return the argument. + auto processSnan = [&](mlir::Value x) { + fir::IfOp ifOp = builder.create(loc, resultType, + genIsFPClass(i1Ty, x, snanTest), + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); + genRaiseExcept(_FORTRAN_RUNTIME_IEEE_INVALID); + builder.create(loc, genQNan(resultType)); + builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); + builder.create(loc, x); + builder.setInsertionPointAfter(ifOp); + return ifOp.getResult(0); + }; + + // Conversion is a nop, except that A may be an sNaN. + if (resultType == aType) + return processSnan(a); + + // Can't directly convert between kind=2 and kind=3. + mlir::Value r, r1; + if ((aType.isBF16() && resultType.isF16()) || + (aType.isF16() && resultType.isBF16())) { + a = builder.createConvert(loc, f32Ty, a); + aType = f32Ty; + } + r = builder.create(loc, resultType, a); + + mlir::IntegerType aIntType = mlir::dyn_cast(aType); + mlir::FloatType aFloatType = mlir::dyn_cast(aType); + mlir::FloatType resultFloatType = mlir::dyn_cast(resultType); + + // Conversion from a smaller type to a larger type is exact. + if ((aIntType ? aIntType.getWidth() : aFloatType.getWidth()) < + resultFloatType.getWidth()) + return aIntType ? r : processSnan(r); + + // A possibly inexact conversion result may need to be rounded up or down. + mlir::Value b = builder.create(loc, aType, r); + mlir::Value aEqB; + if (aIntType) + aEqB = builder.create( + loc, mlir::arith::CmpIPredicate::eq, a, b); + else + aEqB = builder.create( + loc, mlir::arith::CmpFPredicate::UEQ, a, b); + + // [a == b] a is a NaN or r is exact (a may be -0, +0, -inf, +inf) -- return r + fir::IfOp ifOp1 = builder.create(loc, resultType, aEqB, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp1.getThenRegion().front()); + builder.create(loc, aIntType ? r : processSnan(r)); + + // Code common to (a < b) and (a > b) branches. + builder.setInsertionPointToStart(&ifOp1.getElseRegion().front()); + mlir::func::FuncOp getRound = fir::factory::getLlvmGetRounding(builder); + mlir::Value mode = builder.create(loc, getRound).getResult(0); + mlir::Value aIsNegative, aIsPositive; + if (aIntType) { + mlir::Value zero = builder.createIntegerConstant(loc, aIntType, 0); + aIsNegative = builder.create( + loc, mlir::arith::CmpIPredicate::slt, a, zero); + aIsPositive = builder.create( + loc, mlir::arith::CmpIPredicate::sgt, a, zero); + } else { + mlir::Value zero = builder.createRealZeroConstant(loc, aFloatType); + aIsNegative = builder.create( + loc, mlir::arith::CmpFPredicate::OLT, a, zero); + aIsPositive = builder.create( + loc, mlir::arith::CmpFPredicate::OGT, a, zero); + } + mlir::Type resultIntType = builder.getIntegerType(resultFloatType.getWidth()); + mlir::Value resultCast = + builder.create(loc, resultIntType, r); + mlir::Value one = builder.createIntegerConstant(loc, resultIntType, 1); + mlir::Value rIsOdd = builder.create( + loc, i1Ty, builder.create(loc, resultCast, one)); + // Check for a rounding mode match. + auto match = [&](int m) { + return builder.create( + loc, mlir::arith::CmpIPredicate::eq, mode, + builder.createIntegerConstant(loc, mode.getType(), m)); + }; + mlir::Value roundToNearestBit = builder.create( + loc, + // IEEE_OTHER is an alias for IEEE_NEAREST. + match(_FORTRAN_RUNTIME_IEEE_NEAREST), match(_FORTRAN_RUNTIME_IEEE_OTHER)); + mlir::Value roundToNearest = + builder.create(loc, roundToNearestBit, rIsOdd); + mlir::Value roundToZeroBit = match(_FORTRAN_RUNTIME_IEEE_TO_ZERO); + mlir::Value roundAwayBit = match(_FORTRAN_RUNTIME_IEEE_AWAY); + mlir::Value roundToZero, roundAway, mustAdjust; + fir::IfOp adjustIfOp; + mlir::Value aLtB; + if (aIntType) + aLtB = builder.create( + loc, mlir::arith::CmpIPredicate::slt, a, b); + else + aLtB = builder.create( + loc, mlir::arith::CmpFPredicate::OLT, a, b); + mlir::Value upResult = + builder.create(loc, resultCast, one); + mlir::Value downResult = + builder.create(loc, resultCast, one); + + // (a < b): r is inexact -- return r or ieee_next_down(r) + fir::IfOp ifOp2 = builder.create(loc, resultType, aLtB, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&ifOp2.getThenRegion().front()); + roundToZero = + builder.create(loc, roundToZeroBit, aIsPositive); + roundAway = + builder.create(loc, roundAwayBit, aIsNegative); + mlir::Value roundDown = match(_FORTRAN_RUNTIME_IEEE_DOWN); + mustAdjust = + builder.create(loc, roundToNearest, roundToZero); + mustAdjust = builder.create(loc, mustAdjust, roundAway); + mustAdjust = builder.create(loc, mustAdjust, roundDown); + adjustIfOp = builder.create(loc, resultType, mustAdjust, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&adjustIfOp.getThenRegion().front()); + if (resultType.isF80()) + r1 = fir::runtime::genNearest(builder, loc, r, + builder.createBool(loc, false)); + else + r1 = builder.create( + loc, resultType, + builder.create(loc, aIsNegative, upResult, + downResult)); + builder.create(loc, r1); + builder.setInsertionPointToStart(&adjustIfOp.getElseRegion().front()); + builder.create(loc, r); + builder.setInsertionPointAfter(adjustIfOp); + builder.create(loc, adjustIfOp.getResult(0)); + + // (a > b): r is inexact -- return r or ieee_next_up(r) + builder.setInsertionPointToStart(&ifOp2.getElseRegion().front()); + roundToZero = + builder.create(loc, roundToZeroBit, aIsNegative); + roundAway = + builder.create(loc, roundAwayBit, aIsPositive); + mlir::Value roundUp = match(_FORTRAN_RUNTIME_IEEE_UP); + mustAdjust = + builder.create(loc, roundToNearest, roundToZero); + mustAdjust = builder.create(loc, mustAdjust, roundAway); + mustAdjust = builder.create(loc, mustAdjust, roundUp); + adjustIfOp = builder.create(loc, resultType, mustAdjust, + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&adjustIfOp.getThenRegion().front()); + if (resultType.isF80()) + r1 = fir::runtime::genNearest(builder, loc, r, + builder.createBool(loc, true)); + else + r1 = builder.create( + loc, resultType, + builder.create(loc, aIsPositive, upResult, + downResult)); + builder.create(loc, r1); + builder.setInsertionPointToStart(&adjustIfOp.getElseRegion().front()); + builder.create(loc, r); + builder.setInsertionPointAfter(adjustIfOp); + builder.create(loc, adjustIfOp.getResult(0)); + + // Generate exceptions for (a < b) and (a > b) branches. + builder.setInsertionPointAfter(ifOp2); + r = ifOp2.getResult(0); + fir::IfOp exceptIfOp1 = builder.create( + loc, genIsFPClass(i1Ty, r, infiniteTest), /*withElseRegion=*/true); + builder.setInsertionPointToStart(&exceptIfOp1.getThenRegion().front()); + genRaiseExcept(_FORTRAN_RUNTIME_IEEE_OVERFLOW | + _FORTRAN_RUNTIME_IEEE_INEXACT); + builder.setInsertionPointToStart(&exceptIfOp1.getElseRegion().front()); + fir::IfOp exceptIfOp2 = builder.create( + loc, genIsFPClass(i1Ty, r, subnormalTest | zeroTest), + /*withElseRegion=*/true); + builder.setInsertionPointToStart(&exceptIfOp2.getThenRegion().front()); + genRaiseExcept(_FORTRAN_RUNTIME_IEEE_UNDERFLOW | + _FORTRAN_RUNTIME_IEEE_INEXACT); + builder.setInsertionPointToStart(&exceptIfOp2.getElseRegion().front()); + genRaiseExcept(_FORTRAN_RUNTIME_IEEE_INEXACT); + builder.setInsertionPointAfter(exceptIfOp1); + builder.create(loc, ifOp2.getResult(0)); + builder.setInsertionPointAfter(ifOp1); + return ifOp1.getResult(0); +} + // IEEE_RINT mlir::Value IntrinsicLibrary::genIeeeRint(mlir::Type resultType, llvm::ArrayRef args) { diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index e6eeb0d5db4a84..4c8c56e0f21cef 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2949,9 +2949,10 @@ struct LoadOpConversion : public fir::FIROpConversion { llvm::LogicalResult matchAndRewrite(fir::LoadOp load, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Type llvmLoadTy = convertObjectType(load.getType()); if (auto boxTy = mlir::dyn_cast(load.getType())) { - // fir.box is a special case because it is considered as an ssa values in + // fir.box is a special case because it is considered an ssa value in // fir, but it is lowered as a pointer to a descriptor. So // fir.ref and fir.box end up being the same llvm types and // loading a fir.ref is implemented as taking a snapshot of the @@ -2960,30 +2961,17 @@ struct LoadOpConversion : public fir::FIROpConversion { mlir::Location loc = load.getLoc(); auto newBoxStorage = genAllocaAndAddrCastWithType(loc, llvmLoadTy, defaultAlign, rewriter); - // TODO: always generate llvm.memcpy, LLVM is better at optimizing it than - // aggregate loads + stores. - if (boxTy.isAssumedRank()) { - - TypePair boxTypePair{boxTy, llvmLoadTy}; - mlir::Value boxSize = - computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter); - auto memcpy = rewriter.create( - loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false); - if (std::optional optionalTag = load.getTbaa()) - memcpy.setTBAATags(*optionalTag); - else - attachTBAATag(memcpy, boxTy, boxTy, nullptr); - } else { - auto boxValue = rewriter.create(loc, llvmLoadTy, - inputBoxStorage); - if (std::optional optionalTag = load.getTbaa()) - boxValue.setTBAATags(*optionalTag); - else - attachTBAATag(boxValue, boxTy, boxTy, nullptr); - auto storeOp = - rewriter.create(loc, boxValue, newBoxStorage); - attachTBAATag(storeOp, boxTy, boxTy, nullptr); - } + + TypePair boxTypePair{boxTy, llvmLoadTy}; + mlir::Value boxSize = + computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter); + auto memcpy = rewriter.create( + loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false); + + if (std::optional optionalTag = load.getTbaa()) + memcpy.setTBAATags(*optionalTag); + else + attachTBAATag(memcpy, boxTy, boxTy, nullptr); rewriter.replaceOp(load, newBoxStorage); } else { auto loadOp = rewriter.create( @@ -3227,20 +3215,13 @@ struct StoreOpConversion : public fir::FIROpConversion { mlir::LLVM::AliasAnalysisOpInterface newOp; if (auto boxTy = mlir::dyn_cast(storeTy)) { mlir::Type llvmBoxTy = lowerTy().convertBoxTypeAsStruct(boxTy); - // fir.box value is actually in memory, load it first before storing it, - // or do a memcopy for assumed-rank descriptors. - if (boxTy.isAssumedRank()) { - TypePair boxTypePair{boxTy, llvmBoxTy}; - mlir::Value boxSize = - computeBoxSize(loc, boxTypePair, llvmValue, rewriter); - newOp = rewriter.create( - loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false); - } else { - auto val = - rewriter.create(loc, llvmBoxTy, llvmValue); - attachTBAATag(val, boxTy, boxTy, nullptr); - newOp = rewriter.create(loc, val, llvmMemref); - } + // Always use memcpy because LLVM is not as effective at optimizing + // aggregate loads/stores as it is optimizing memcpy. + TypePair boxTypePair{boxTy, llvmBoxTy}; + mlir::Value boxSize = + computeBoxSize(loc, boxTypePair, llvmValue, rewriter); + newOp = rewriter.create( + loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false); } else { newOp = rewriter.create(loc, llvmValue, llvmMemref); } diff --git a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp index 4da06be8ef7dd9..7cdb2f7ffe27d9 100644 --- a/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp +++ b/flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp @@ -6,15 +6,23 @@ // //===----------------------------------------------------------------------===// +#include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/Runtime/RTBuilder.h" +#include "flang/Optimizer/Builder/Todo.h" +#include "flang/Optimizer/CodeGen/Target.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Optimizer/Support/DataLayout.h" #include "flang/Optimizer/Transforms/CUFCommon.h" +#include "flang/Runtime/CUDA/registration.h" #include "flang/Runtime/entry-names.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Value.h" #include "mlir/Pass/Pass.h" #include "llvm/ADT/SmallVector.h" @@ -23,6 +31,8 @@ namespace fir { #include "flang/Optimizer/Transforms/Passes.h.inc" } // namespace fir +using namespace Fortran::runtime::cuda; + namespace { static constexpr llvm::StringRef cudaFortranCtorName{ @@ -34,13 +44,23 @@ struct CUFAddConstructor void runOnOperation() override { mlir::ModuleOp mod = getOperation(); mlir::SymbolTable symTab(mod); - mlir::OpBuilder builder{mod.getBodyRegion()}; + mlir::OpBuilder opBuilder{mod.getBodyRegion()}; + fir::FirOpBuilder builder(opBuilder, mod); + fir::KindMapping kindMap{fir::getKindMapping(mod)}; builder.setInsertionPointToEnd(mod.getBody()); mlir::Location loc = mod.getLoc(); auto *ctx = mod.getContext(); auto voidTy = mlir::LLVM::LLVMVoidType::get(ctx); + auto idxTy = builder.getIndexType(); auto funcTy = mlir::LLVM::LLVMFunctionType::get(voidTy, {}, /*isVarArg=*/false); + std::optional dl = + fir::support::getOrSetDataLayout(mod, /*allowDefaultLayout=*/false); + if (!dl) { + mlir::emitError(mod.getLoc(), + "data layout attribute is required to perform " + + getName() + "pass"); + } // Symbol reference to CUFRegisterAllocator. builder.setInsertionPointToEnd(mod.getBody()); @@ -58,12 +78,13 @@ struct CUFAddConstructor builder.setInsertionPointToStart(func.addEntryBlock(builder)); builder.create(loc, funcTy, cufRegisterAllocatorRef); - // Register kernels auto gpuMod = symTab.lookup(cudaDeviceModuleName); if (gpuMod) { auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(ctx); auto registeredMod = builder.create( loc, llvmPtrTy, mlir::SymbolRefAttr::get(ctx, gpuMod.getName())); + + // Register kernels for (auto func : gpuMod.getOps()) { if (func.isKernel()) { auto kernelName = mlir::SymbolRefAttr::get( @@ -72,12 +93,55 @@ struct CUFAddConstructor builder.create(loc, kernelName, registeredMod); } } + + // Register variables + for (fir::GlobalOp globalOp : mod.getOps()) { + auto attr = globalOp.getDataAttrAttr(); + if (!attr) + continue; + + mlir::func::FuncOp func; + switch (attr.getValue()) { + case cuf::DataAttribute::Device: + case cuf::DataAttribute::Constant: { + func = fir::runtime::getRuntimeFunc( + loc, builder); + auto fTy = func.getFunctionType(); + + // Global variable name + std::string gblNameStr = globalOp.getSymbol().getValue().str(); + gblNameStr += '\0'; + mlir::Value gblName = fir::getBase( + fir::factory::createStringLiteral(builder, loc, gblNameStr)); + + // Global variable size + auto sizeAndAlign = fir::getTypeSizeAndAlignmentOrCrash( + loc, globalOp.getType(), *dl, kindMap); + auto size = + builder.createIntegerConstant(loc, idxTy, sizeAndAlign.first); + + // Global variable address + mlir::Value addr = builder.create( + loc, globalOp.resultType(), globalOp.getSymbol()); + + llvm::SmallVector args{fir::runtime::createArguments( + builder, loc, fTy, registeredMod, addr, gblName, size)}; + builder.create(loc, func, args); + } break; + case cuf::DataAttribute::Managed: + TODO(loc, "registration of managed variables"); + default: + break; + } + if (!func) + continue; + } } builder.create(loc, mlir::ValueRange{}); // Create the llvm.global_ctor with the function. - // TODO: We might want to have a utility that retrieve it if already created - // and adds new functions. + // TODO: We might want to have a utility that retrieve it if already + // created and adds new functions. builder.setInsertionPointToEnd(mod.getBody()); llvm::SmallVector funcs; funcs.push_back( diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 9c2b882c7f46fe..e3e441360e949b 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -15,6 +15,7 @@ #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" +#include "flang/Optimizer/Transforms/CUFCommon.h" #include "flang/Runtime/CUDA/allocatable.h" #include "flang/Runtime/CUDA/common.h" #include "flang/Runtime/CUDA/descriptor.h" @@ -111,7 +112,7 @@ mlir::Value getDeviceAddress(mlir::PatternRewriter &rewriter, switch (attr.getValue()) { case cuf::DataAttribute::Device: case cuf::DataAttribute::Managed: - case cuf::DataAttribute::Pinned: + case cuf::DataAttribute::Constant: isDevGlobal = true; break; default: @@ -172,7 +173,7 @@ static mlir::LogicalResult convertOpToCall(OpTy op, return mlir::success(); } -struct CufAllocateOpConversion +struct CUFAllocateOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -215,7 +216,7 @@ struct CufAllocateOpConversion } }; -struct CufDeallocateOpConversion +struct CUFDeallocateOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -283,10 +284,10 @@ static int computeWidth(mlir::Location loc, mlir::Type type, return width; } -struct CufAllocOpConversion : public mlir::OpRewritePattern { +struct CUFAllocOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; - CufAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl, + CUFAllocOpConversion(mlir::MLIRContext *context, mlir::DataLayout *dl, const fir::LLVMTypeConverter *typeConverter) : OpRewritePattern(context), dl{dl}, typeConverter{typeConverter} {} @@ -379,7 +380,7 @@ struct CufAllocOpConversion : public mlir::OpRewritePattern { const fir::LLVMTypeConverter *typeConverter; }; -struct CufFreeOpConversion : public mlir::OpRewritePattern { +struct CUFFreeOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; mlir::LogicalResult @@ -428,11 +429,11 @@ struct CufFreeOpConversion : public mlir::OpRewritePattern { } }; -struct CufDataTransferOpConversion +struct CUFDataTransferOpConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; - CufDataTransferOpConversion(mlir::MLIRContext *context, + CUFDataTransferOpConversion(mlir::MLIRContext *context, const mlir::SymbolTable &symtab) : OpRewritePattern(context), symtab{symtab} {} @@ -528,8 +529,8 @@ struct CufDataTransferOpConversion mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); mlir::Value sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(4)); - mlir::Value dst = builder.loadIfRef(loc, op.getDst()); - mlir::Value src = builder.loadIfRef(loc, op.getSrc()); + mlir::Value dst = op.getDst(); + mlir::Value src = op.getSrc(); llvm::SmallVector args{fir::runtime::createArguments( builder, loc, fTy, dst, src, modeValue, sourceFile, sourceLine)}; builder.create(loc, func, args); @@ -602,11 +603,8 @@ struct CufDataTransferOpConversion mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); mlir::Value sourceLine = fir::factory::locationToLineNo(builder, loc, fTy.getInput(5)); - mlir::Value dst = - dstIsDesc ? builder.loadIfRef(loc, op.getDst()) : op.getDst(); - mlir::Value src = mlir::isa(srcTy) - ? builder.loadIfRef(loc, op.getSrc()) - : op.getSrc(); + mlir::Value dst = op.getDst(); + mlir::Value src = op.getSrc(); llvm::SmallVector args{ fir::runtime::createArguments(builder, loc, fTy, dst, src, bytes, modeValue, sourceFile, sourceLine)}; @@ -620,6 +618,69 @@ struct CufDataTransferOpConversion const mlir::SymbolTable &symtab; }; +struct CUFLaunchOpConversion + : public mlir::OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + CUFLaunchOpConversion(mlir::MLIRContext *context, + const mlir::SymbolTable &symTab) + : OpRewritePattern(context), symTab{symTab} {} + + mlir::LogicalResult + matchAndRewrite(cuf::KernelLaunchOp op, + mlir::PatternRewriter &rewriter) const override { + mlir::Location loc = op.getLoc(); + auto idxTy = mlir::IndexType::get(op.getContext()); + auto zero = rewriter.create( + loc, rewriter.getIntegerType(32), rewriter.getI32IntegerAttr(0)); + auto gridSizeX = + rewriter.create(loc, idxTy, op.getGridX()); + auto gridSizeY = + rewriter.create(loc, idxTy, op.getGridY()); + auto gridSizeZ = + rewriter.create(loc, idxTy, op.getGridZ()); + auto blockSizeX = + rewriter.create(loc, idxTy, op.getBlockX()); + auto blockSizeY = + rewriter.create(loc, idxTy, op.getBlockY()); + auto blockSizeZ = + rewriter.create(loc, idxTy, op.getBlockZ()); + auto kernelName = mlir::SymbolRefAttr::get( + rewriter.getStringAttr(cudaDeviceModuleName), + {mlir::SymbolRefAttr::get( + rewriter.getContext(), + op.getCallee().getLeafReference().getValue())}); + mlir::Value clusterDimX, clusterDimY, clusterDimZ; + if (auto funcOp = symTab.lookup( + op.getCallee().getLeafReference())) { + if (auto clusterDimsAttr = funcOp->getAttrOfType( + cuf::getClusterDimsAttrName())) { + clusterDimX = rewriter.create( + loc, clusterDimsAttr.getX().getInt()); + clusterDimY = rewriter.create( + loc, clusterDimsAttr.getY().getInt()); + clusterDimZ = rewriter.create( + loc, clusterDimsAttr.getZ().getInt()); + } + } + auto gpuLaunchOp = rewriter.create( + loc, kernelName, mlir::gpu::KernelDim3{gridSizeX, gridSizeY, gridSizeZ}, + mlir::gpu::KernelDim3{blockSizeX, blockSizeY, blockSizeZ}, zero, + op.getArgs()); + if (clusterDimX && clusterDimY && clusterDimZ) { + gpuLaunchOp.getClusterSizeXMutable().assign(clusterDimX); + gpuLaunchOp.getClusterSizeYMutable().assign(clusterDimY); + gpuLaunchOp.getClusterSizeZMutable().assign(clusterDimZ); + } + rewriter.replaceOp(op, gpuLaunchOp); + return mlir::success(); + } + +private: + const mlir::SymbolTable &symTab; +}; + class CUFOpConversion : public fir::impl::CUFOpConversionBase { public: void runOnOperation() override { @@ -637,7 +698,8 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase { fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false); fir::LLVMTypeConverter typeConverter(module, /*applyTBAA=*/false, /*forceUnifiedTBAATree=*/false, *dl); - target.addLegalDialect(); + target.addLegalDialect(); cuf::populateCUFToFIRConversionPatterns(typeConverter, *dl, symtab, patterns); if (mlir::failed(mlir::applyPartialConversion(getOperation(), target, @@ -653,8 +715,9 @@ class CUFOpConversion : public fir::impl::CUFOpConversionBase { void cuf::populateCUFToFIRConversionPatterns( const fir::LLVMTypeConverter &converter, mlir::DataLayout &dl, const mlir::SymbolTable &symtab, mlir::RewritePatternSet &patterns) { - patterns.insert(patterns.getContext(), &dl, &converter); - patterns.insert(patterns.getContext()); - patterns.insert(patterns.getContext(), symtab); + patterns.insert(patterns.getContext(), &dl, &converter); + patterns.insert(patterns.getContext()); + patterns.insert( + patterns.getContext(), symtab); } diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp index 8e516734a90879..a070c87137fa16 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.cpp @@ -271,6 +271,19 @@ static bool canCacheThisType(mlir::LLVM::DICompositeTypeAttr comTy) { return true; } +std::pair +DebugTypeGenerator::getFieldSizeAndAlign(mlir::Type fieldTy) { + mlir::Type llvmTy; + if (auto boxTy = mlir::dyn_cast_or_null(fieldTy)) + llvmTy = llvmTypeConverter.convertBoxTypeAsStruct(boxTy, getBoxRank(boxTy)); + else + llvmTy = llvmTypeConverter.convertType(fieldTy); + + uint64_t byteSize = dataLayout->getTypeSize(llvmTy); + unsigned short byteAlign = dataLayout->getTypeABIAlignment(llvmTy); + return std::pair{byteSize, byteAlign}; +} + mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( fir::RecordType Ty, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { @@ -303,15 +316,7 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( mlir::IntegerType intTy = mlir::IntegerType::get(context, 64); std::uint64_t offset = 0; for (auto [fieldName, fieldTy] : Ty.getTypeList()) { - mlir::Type llvmTy; - if (auto boxTy = mlir::dyn_cast_or_null(fieldTy)) - llvmTy = - llvmTypeConverter.convertBoxTypeAsStruct(boxTy, getBoxRank(boxTy)); - else - llvmTy = llvmTypeConverter.convertType(fieldTy); - - uint64_t byteSize = dataLayout->getTypeSize(llvmTy); - unsigned short byteAlign = dataLayout->getTypeABIAlignment(llvmTy); + auto [byteSize, byteAlign] = getFieldSizeAndAlign(fieldTy); std::optional> lowerBounds = fir::getComponentLowerBoundsIfNonDefault(Ty, fieldName, module, symbolTable); @@ -368,6 +373,42 @@ mlir::LLVM::DITypeAttr DebugTypeGenerator::convertRecordType( return finalAttr; } +mlir::LLVM::DITypeAttr DebugTypeGenerator::convertTupleType( + mlir::TupleType Ty, mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { + // Check if this type has already been converted. + auto iter = typeCache.find(Ty); + if (iter != typeCache.end()) + return iter->second; + + llvm::SmallVector elements; + mlir::MLIRContext *context = module.getContext(); + + std::uint64_t offset = 0; + for (auto fieldTy : Ty.getTypes()) { + auto [byteSize, byteAlign] = getFieldSizeAndAlign(fieldTy); + mlir::LLVM::DITypeAttr elemTy = + convertType(fieldTy, fileAttr, scope, /*declOp=*/nullptr); + offset = llvm::alignTo(offset, byteAlign); + mlir::LLVM::DIDerivedTypeAttr tyAttr = mlir::LLVM::DIDerivedTypeAttr::get( + context, llvm::dwarf::DW_TAG_member, mlir::StringAttr::get(context, ""), + elemTy, byteSize * 8, byteAlign * 8, offset * 8, + /*optional
=*/std::nullopt, + /*extra data=*/nullptr); + elements.push_back(tyAttr); + offset += llvm::alignTo(byteSize, byteAlign); + } + + auto typeAttr = mlir::LLVM::DICompositeTypeAttr::get( + context, llvm::dwarf::DW_TAG_structure_type, + mlir::StringAttr::get(context, ""), fileAttr, /*line=*/0, scope, + /*baseType=*/nullptr, mlir::LLVM::DIFlags::Zero, offset * 8, + /*alignInBits=*/0, elements, /*dataLocation=*/nullptr, /*rank=*/nullptr, + /*allocated=*/nullptr, /*associated=*/nullptr); + typeCache[Ty] = typeAttr; + return typeAttr; +} + mlir::LLVM::DITypeAttr DebugTypeGenerator::convertSequenceType( fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp) { @@ -574,6 +615,8 @@ DebugTypeGenerator::convertType(mlir::Type Ty, mlir::LLVM::DIFileAttr fileAttr, /*hasDescriptor=*/false); } else if (auto recTy = mlir::dyn_cast_or_null(Ty)) { return convertRecordType(recTy, fileAttr, scope, declOp); + } else if (auto tupleTy = mlir::dyn_cast_if_present(Ty)) { + return convertTupleType(tupleTy, fileAttr, scope, declOp); } else if (auto refTy = mlir::dyn_cast_if_present(Ty)) { auto elTy = refTy.getEleTy(); return convertPointerLikeType(elTy, fileAttr, scope, declOp, diff --git a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h index eeefb6c463d936..c1fce4bdae5ce5 100644 --- a/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h +++ b/flang/lib/Optimizer/Transforms/DebugTypeGenerator.h @@ -39,6 +39,10 @@ class DebugTypeGenerator { mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp); + mlir::LLVM::DITypeAttr convertTupleType(mlir::TupleType Ty, + mlir::LLVM::DIFileAttr fileAttr, + mlir::LLVM::DIScopeAttr scope, + fir::cg::XDeclareOp declOp); mlir::LLVM::DITypeAttr convertSequenceType(fir::SequenceType seqTy, mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, @@ -73,6 +77,8 @@ class DebugTypeGenerator { mlir::LLVM::DIFileAttr fileAttr, mlir::LLVM::DIScopeAttr scope, fir::cg::XDeclareOp declOp); + std::pair + getFieldSizeAndAlign(mlir::Type fieldTy); mlir::ModuleOp module; mlir::SymbolTable *symbolTable; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 598439cbee87e6..6fde70fc5c3878 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -366,9 +366,12 @@ TYPE_PARSER( construct(name, maybe(Parser{}))) TYPE_PARSER(construct( + "DEPOBJ" >> pure(OmpTaskDependenceType::Type::Depobj) || "IN"_id >> pure(OmpTaskDependenceType::Type::In) || "INOUT" >> pure(OmpTaskDependenceType::Type::Inout) || - "OUT" >> pure(OmpTaskDependenceType::Type::Out))) + "OUT" >> pure(OmpTaskDependenceType::Type::Out) || + "SINK" >> pure(OmpTaskDependenceType::Type::Sink) || + "SOURCE" >> pure(OmpTaskDependenceType::Type::Source))) TYPE_CONTEXT_PARSER("Omp Depend clause"_en_US, construct(construct( @@ -454,6 +457,9 @@ TYPE_PARSER( parenthesized(Parser{}))) || "DEPEND" >> construct(construct( parenthesized(Parser{}))) || + "DESTROY" >> + construct(construct(maybe(parenthesized( + construct(Parser{}))))) || "DEVICE" >> construct(construct( parenthesized(Parser{}))) || "DEVICE_TYPE" >> construct(construct( @@ -560,7 +566,9 @@ TYPE_PARSER( construct(construct()) || "UNIFORM" >> construct(construct( parenthesized(nonemptyList(name)))) || - "UNTIED" >> construct(construct())) + "UNTIED" >> construct(construct()) || + "UPDATE" >> construct(construct( + parenthesized(Parser{})))) // [Clause, [Clause], ...] TYPE_PARSER(sourced(construct( @@ -583,12 +591,19 @@ TYPE_PARSER(sourced(construct(first( "MASKED TASKLOOP SIMD" >> pure(llvm::omp::Directive::OMPD_masked_taskloop_simd), "MASKED TASKLOOP" >> pure(llvm::omp::Directive::OMPD_masked_taskloop), + "MASTER TASKLOOP SIMD" >> + pure(llvm::omp::Directive::OMPD_master_taskloop_simd), + "MASTER TASKLOOP" >> pure(llvm::omp::Directive::OMPD_master_taskloop), "PARALLEL DO SIMD" >> pure(llvm::omp::Directive::OMPD_parallel_do_simd), "PARALLEL DO" >> pure(llvm::omp::Directive::OMPD_parallel_do), "PARALLEL MASKED TASKLOOP SIMD" >> pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop_simd), "PARALLEL MASKED TASKLOOP" >> pure(llvm::omp::Directive::OMPD_parallel_masked_taskloop), + "PARALLEL MASTER TASKLOOP SIMD" >> + pure(llvm::omp::Directive::OMPD_parallel_master_taskloop_simd), + "PARALLEL MASTER TASKLOOP" >> + pure(llvm::omp::Directive::OMPD_parallel_master_taskloop), "SIMD" >> pure(llvm::omp::Directive::OMPD_simd), "TARGET LOOP" >> pure(llvm::omp::Directive::OMPD_target_loop), "TARGET PARALLEL DO SIMD" >> @@ -673,6 +688,9 @@ TYPE_PARSER(sourced(construct( TYPE_PARSER(sourced(construct( many(maybe(","_tok) >> sourced(Parser{}))))) +TYPE_PARSER(sourced(construct(verbatim("DEPOBJ"_tok), + parenthesized(Parser{}), sourced(Parser{})))) + TYPE_PARSER(sourced(construct(verbatim("FLUSH"_tok), many(maybe(","_tok) >> sourced(Parser{})), maybe(parenthesized(Parser{}))))) @@ -697,7 +715,8 @@ TYPE_PARSER( construct(Parser{}) || construct(Parser{}) || construct( - Parser{})) / + Parser{}) || + construct(Parser{})) / endOfLine) // Directives enclosing structured-block @@ -706,6 +725,7 @@ TYPE_PARSER(construct(first( "MASTER" >> pure(llvm::omp::Directive::OMPD_master), "ORDERED" >> pure(llvm::omp::Directive::OMPD_ordered), "PARALLEL MASKED" >> pure(llvm::omp::Directive::OMPD_parallel_masked), + "PARALLEL MASTER" >> pure(llvm::omp::Directive::OMPD_parallel_master), "PARALLEL WORKSHARE" >> pure(llvm::omp::Directive::OMPD_parallel_workshare), "PARALLEL" >> pure(llvm::omp::Directive::OMPD_parallel), "SCOPE" >> pure(llvm::omp::Directive::OMPD_scope), diff --git a/flang/lib/Parser/parse-tree.cpp b/flang/lib/Parser/parse-tree.cpp index 948ad04a091a8c..60aef1666e9ba7 100644 --- a/flang/lib/Parser/parse-tree.cpp +++ b/flang/lib/Parser/parse-tree.cpp @@ -252,6 +252,23 @@ CharBlock Variable::GetSource() const { llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Name &x) { return os << x.ToString(); } + +OmpTaskDependenceType::Type OmpDependClause::GetDepType() const { + return common::visit( + common::visitors{ + [&](const parser::OmpDependClause::Source &) { + return parser::OmpTaskDependenceType::Type::Source; + }, + [&](const parser::OmpDependClause::Sink &) { + return parser::OmpTaskDependenceType::Type::Sink; + }, + [&](const parser::OmpDependClause::InOut &y) { + return std::get(y.t).v; + }, + }, + u); +} + } // namespace Fortran::parser template static llvm::omp::Clause getClauseIdForClass(C &&) { diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 39fcb61609e33b..3b0824f80161f4 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2215,11 +2215,9 @@ class UnparseVisitor { Walk(std::get>(x.t)); } void Unparse(const OmpDependClause::InOut &x) { - Put("("); Walk(std::get(x.t)); Put(":"); Walk(std::get(x.t)); - Put(")"); } bool Pre(const OmpDependClause &x) { return common::visit( @@ -2274,6 +2272,12 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_masked_taskloop: Word("MASKED TASKLOOP"); break; + case llvm::omp::Directive::OMPD_master_taskloop_simd: + Word("MASTER TASKLOOP SIMD"); + break; + case llvm::omp::Directive::OMPD_master_taskloop: + Word("MASTER TASKLOOP"); + break; case llvm::omp::Directive::OMPD_parallel_do: Word("PARALLEL DO "); break; @@ -2286,6 +2290,12 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_parallel_masked_taskloop: Word("PARALLEL MASKED TASKLOOP"); break; + case llvm::omp::Directive::OMPD_parallel_master_taskloop_simd: + Word("PARALLEL MASTER TASKLOOP SIMD"); + break; + case llvm::omp::Directive::OMPD_parallel_master_taskloop: + Word("PARALLEL MASTER TASKLOOP"); + break; case llvm::omp::Directive::OMPD_simd: Word("SIMD "); break; @@ -2390,6 +2400,9 @@ class UnparseVisitor { case llvm::omp::Directive::OMPD_parallel_masked: Word("PARALLEL MASKED"); break; + case llvm::omp::Directive::OMPD_parallel_master: + Word("PARALLEL MASTER"); + break; case llvm::omp::Directive::OMPD_parallel_workshare: Word("PARALLEL WORKSHARE "); break; @@ -2706,6 +2719,16 @@ class UnparseVisitor { }, x.u); } + void Unparse(const OpenMPDepobjConstruct &x) { + BeginOpenMP(); + Word("!$OMP DEPOBJ"); + Put("("); + Walk(std::get(x.t)); + Put(") "); + Walk(std::get(x.t)); + Put("\n"); + EndOpenMP(); + } void Unparse(const OpenMPFlushConstruct &x) { BeginOpenMP(); Word("!$OMP FLUSH "); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 8f3eb9fefee678..c813100b4b16c8 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1261,6 +1261,39 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareSimdConstruct &) { dirContext_.pop_back(); } +void OmpStructureChecker::Enter(const parser::OpenMPDepobjConstruct &x) { + const auto &dir{std::get(x.t)}; + PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_depobj); + + // [5.2:73:27-28] + // If the destroy clause appears on a depobj construct, destroy-var must + // refer to the same depend object as the depobj argument of the construct. + auto &clause{std::get(x.t)}; + if (clause.Id() == llvm::omp::Clause::OMPC_destroy) { + auto getSymbol{[&](const parser::OmpObject &obj) { + return common::visit( + [&](auto &&s) { return GetLastName(s).symbol; }, obj.u); + }}; + + auto &wrapper{std::get(clause.u)}; + if (const std::optional &destroy{wrapper.v}) { + const Symbol *constrSym{getSymbol(std::get(x.t))}; + const Symbol *clauseSym{getSymbol(destroy->v)}; + assert(constrSym && "Unresolved depobj construct symbol"); + assert(clauseSym && "Unresolved destroy symbol on depobj construct"); + if (constrSym != clauseSym) { + context_.Say(x.source, + "The DESTROY clause must refer to the same object as the " + "DEPOBJ construct"_err_en_US); + } + } + } +} + +void OmpStructureChecker::Leave(const parser::OpenMPDepobjConstruct &x) { + dirContext_.pop_back(); +} + void OmpStructureChecker::Enter(const parser::OpenMPRequiresConstruct &x) { const auto &dir{std::get(x.t)}; PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_requires); @@ -2476,7 +2509,6 @@ CHECK_SIMPLE_CLAUSE(Capture, OMPC_capture) CHECK_SIMPLE_CLAUSE(Contains, OMPC_contains) CHECK_SIMPLE_CLAUSE(Default, OMPC_default) CHECK_SIMPLE_CLAUSE(Depobj, OMPC_depobj) -CHECK_SIMPLE_CLAUSE(Destroy, OMPC_destroy) CHECK_SIMPLE_CLAUSE(Detach, OMPC_detach) CHECK_SIMPLE_CLAUSE(DeviceType, OMPC_device_type) CHECK_SIMPLE_CLAUSE(DistSchedule, OMPC_dist_schedule) @@ -2519,7 +2551,6 @@ CHECK_SIMPLE_CLAUSE(Uniform, OMPC_uniform) CHECK_SIMPLE_CLAUSE(Unknown, OMPC_unknown) CHECK_SIMPLE_CLAUSE(Untied, OMPC_untied) CHECK_SIMPLE_CLAUSE(UsesAllocators, OMPC_uses_allocators) -CHECK_SIMPLE_CLAUSE(Update, OMPC_update) CHECK_SIMPLE_CLAUSE(Write, OMPC_write) CHECK_SIMPLE_CLAUSE(Init, OMPC_init) CHECK_SIMPLE_CLAUSE(Use, OMPC_use) @@ -2555,6 +2586,22 @@ CHECK_REQ_CONSTANT_SCALAR_INT_CLAUSE(Simdlen, OMPC_simdlen) // Restrictions specific to each clause are implemented apart from the // generalized restrictions. + +void OmpStructureChecker::Enter(const parser::OmpClause::Destroy &x) { + CheckAllowedClause(llvm::omp::Clause::OMPC_destroy); + + llvm::omp::Directive dir{GetContext().directive}; + unsigned version{context_.langOptions().OpenMPVersion}; + if (dir == llvm::omp::Directive::OMPD_depobj) { + if (version < 52) { + context_.Say(GetContext().clauseSource, + "The object parameter in DESTROY clause in DEPOPJ construct " + "was introduced in %s"_port_en_US, + ThisVersion(52)); + } + } +} + void OmpStructureChecker::Enter(const parser::OmpClause::Reduction &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_reduction); if (CheckReductionOperators(x)) { @@ -3285,16 +3332,63 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Device &x) { void OmpStructureChecker::Enter(const parser::OmpClause::Depend &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_depend); - if ((std::holds_alternative(x.v.u) || - std::holds_alternative(x.v.u)) && - GetContext().directive != llvm::omp::OMPD_ordered) { - context_.Say(GetContext().clauseSource, - "DEPEND(SOURCE) or DEPEND(SINK : vec) can be used only with the ordered" - " directive. Used here in the %s construct."_err_en_US, - parser::ToUpperCaseLetters(getDirectiveName(GetContext().directive))); + llvm::omp::Directive directive{GetContext().directive}; + unsigned version{context_.langOptions().OpenMPVersion}; + + using DepType = parser::OmpTaskDependenceType::Type; + DepType depType = x.v.GetDepType(); + + if (version >= 52) { + switch (depType) { + case DepType::Sink: + case DepType::Source: + context_.Say(GetContext().clauseSource, + "The %s task-dependence-type is deprecated in %s"_warn_en_US, + parser::ToUpperCaseLetters( + parser::OmpTaskDependenceType::EnumToString(depType)), + ThisVersion(version)); + break; + default: + break; + } + } + + if (directive == llvm::omp::OMPD_depobj) { + // [5.0:255:11], [5.1:288:3] + // A depend clause on a depobj construct must not have source, sink [or + // depobj](5.0) as dependence-type. + if (version >= 50) { + bool invalidDep{depType == DepType::Source || depType == DepType::Sink}; + if (version == 50) { + invalidDep = invalidDep || depType == DepType::Depobj; + } + if (invalidDep) { + context_.Say(GetContext().clauseSource, + "A DEPEND clause on a DEPOBJ construct must not have SOURCE%s " + "as dependence-type"_err_en_US, + version == 50 ? ", SINK or DEPOBJ" : " or SINK"); + } + } + } else if (directive != llvm::omp::OMPD_ordered) { + if (depType == DepType::Source || depType == DepType::Sink) { + context_.Say(GetContext().clauseSource, + "DEPEND(SOURCE) or DEPEND(SINK : vec) can be used only with the " + "ordered directive. Used here in the %s construct."_err_en_US, + parser::ToUpperCaseLetters(getDirectiveName(directive))); + } } if (const auto *inOut{std::get_if(&x.v.u)}) { - for (const auto &object : std::get(inOut->t).v) { + auto &objList{std::get(inOut->t)}; + if (directive == llvm::omp::OMPD_depobj) { + // [5.0:255:13], [5.1:288:6], [5.2:322:26] + // A depend clause on a depobj construct must only specify one locator. + if (objList.v.size() != 1) { + context_.Say(GetContext().clauseSource, + "A DEPEND clause on a DEPOBJ construct must only specify " + "one locator"_err_en_US); + } + } + for (const auto &object : objList.v) { if (const auto *name{std::get_if(&object.u)}) { context_.Say(GetContext().clauseSource, "Common block name ('%s') cannot appear in a DEPEND " @@ -3313,12 +3407,18 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Depend &x) { } } if (std::get>(inOut->t)) { - unsigned version{context_.langOptions().OpenMPVersion}; unsigned allowedInVersion{50}; if (version < allowedInVersion) { context_.Say(GetContext().clauseSource, "Iterator modifiers are not supported in %s, %s"_warn_en_US, ThisVersion(version), TryVersion(allowedInVersion)); + } else { + if (directive == llvm::omp::OMPD_depobj) { + context_.Say(GetContext().clauseSource, + "An iterator-modifier may specify multiple locators, " + "a DEPEND clause on a DEPOBJ construct must only specify " + "one locator"_warn_en_US); + } } } } @@ -3433,6 +3533,34 @@ void OmpStructureChecker::CheckStructureElement( return; } +void OmpStructureChecker::Enter(const parser::OmpClause::Update &x) { + CheckAllowedClause(llvm::omp::Clause::OMPC_update); + llvm::omp::Directive directive{GetContext().directive}; + unsigned version{context_.langOptions().OpenMPVersion}; + + // [5.1:288:4-5] + // An update clause on a depobj construct must not have source, sink or depobj + // as dependence-type. + // [5.2:322:3] + // task-dependence-type must not be depobj. + if (directive == llvm::omp::OMPD_depobj) { + if (version >= 51) { + // Update -> OmpUpdateClause -> OmpTaskDependenceType -> Type + switch (x.v.v.v) { + case parser::OmpTaskDependenceType::Type::Source: + case parser::OmpTaskDependenceType::Type::Sink: + case parser::OmpTaskDependenceType::Type::Depobj: + context_.Say(GetContext().clauseSource, + "An UPDATE clause on a DEPOBJ construct must not have SOURCE, " + "SINK or DEPOBJ as dependence-type"_err_en_US); + break; + default: + break; + } + } + } +} + void OmpStructureChecker::Enter(const parser::OmpClause::UseDevicePtr &x) { CheckStructureElement(x.v, llvm::omp::Clause::OMPC_use_device_ptr); CheckAllowedClause(llvm::omp::Clause::OMPC_use_device_ptr); @@ -3616,7 +3744,7 @@ void OmpStructureChecker::CheckDependList(const parser::DataRef &d) { context_.Say(GetContext().clauseSource, "Coarrays are not supported in DEPEND clause"_err_en_US); }, - [&](const parser::Name &) { return; }, + [&](const parser::Name &) {}, }, d.u); } diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 237569bc40c483..d5fd558cea2372 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -92,6 +92,8 @@ class OmpStructureChecker void Leave(const parser::OpenMPDeclarativeAllocate &); void Enter(const parser::OpenMPDeclareTargetConstruct &); void Leave(const parser::OpenMPDeclareTargetConstruct &); + void Enter(const parser::OpenMPDepobjConstruct &); + void Leave(const parser::OpenMPDepobjConstruct &); void Enter(const parser::OmpDeclareTargetWithList &); void Enter(const parser::OmpDeclareTargetWithClause &); void Leave(const parser::OmpDeclareTargetWithClause &); diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 014b7987a658bd..359dac911b8c7c 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -383,6 +383,14 @@ class OmpAttributeVisitor : DirectiveAttributeVisitor { } void Post(const parser::OpenMPDeclareSimdConstruct &) { PopContext(); } + bool Pre(const parser::OpenMPDepobjConstruct &x) { + PushContext(x.source, llvm::omp::Directive::OMPD_depobj); + auto &object{std::get(x.t)}; + ResolveOmpObject(object, Symbol::Flag::OmpDependObject); + return true; + } + void Post(const parser::OpenMPDepobjConstruct &) { PopContext(); } + bool Pre(const parser::OpenMPRequiresConstruct &x) { using Flags = WithOmpDeclarative::RequiresFlags; using Requires = WithOmpDeclarative::RequiresFlag; @@ -1531,6 +1539,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { case llvm::omp::Directive::OMPD_masked: case llvm::omp::Directive::OMPD_parallel_masked: case llvm::omp::Directive::OMPD_master: + case llvm::omp::Directive::OMPD_parallel_master: case llvm::omp::Directive::OMPD_ordered: case llvm::omp::Directive::OMPD_parallel: case llvm::omp::Directive::OMPD_scope: @@ -1550,7 +1559,8 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { // TODO others break; } - if (beginDir.v == llvm::omp::Directive::OMPD_master) + if (beginDir.v == llvm::omp::Directive::OMPD_master || + beginDir.v == llvm::omp::Directive::OMPD_parallel_master) IssueNonConformanceWarning(beginDir.v, beginDir.source); ClearDataSharingAttributeObjects(); ClearPrivateDataSharingAttributeObjects(); @@ -1563,7 +1573,9 @@ void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) { const auto &beginDir{std::get(beginBlockDir.t)}; switch (beginDir.v) { case llvm::omp::Directive::OMPD_masked: + case llvm::omp::Directive::OMPD_master: case llvm::omp::Directive::OMPD_parallel_masked: + case llvm::omp::Directive::OMPD_parallel_master: case llvm::omp::Directive::OMPD_parallel: case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: @@ -1634,10 +1646,14 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { case llvm::omp::Directive::OMPD_loop: case llvm::omp::Directive::OMPD_masked_taskloop_simd: case llvm::omp::Directive::OMPD_masked_taskloop: + case llvm::omp::Directive::OMPD_master_taskloop_simd: + case llvm::omp::Directive::OMPD_master_taskloop: case llvm::omp::Directive::OMPD_parallel_do: case llvm::omp::Directive::OMPD_parallel_do_simd: case llvm::omp::Directive::OMPD_parallel_masked_taskloop_simd: case llvm::omp::Directive::OMPD_parallel_masked_taskloop: + case llvm::omp::Directive::OMPD_parallel_master_taskloop_simd: + case llvm::omp::Directive::OMPD_parallel_master_taskloop: case llvm::omp::Directive::OMPD_simd: case llvm::omp::Directive::OMPD_target_loop: case llvm::omp::Directive::OMPD_target_parallel_do: @@ -1662,7 +1678,11 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPLoopConstruct &x) { default: break; } - if (beginDir.v == llvm::omp::Directive::OMPD_target_loop) + if (beginDir.v == llvm::omp::OMPD_master_taskloop || + beginDir.v == llvm::omp::OMPD_master_taskloop_simd || + beginDir.v == llvm::omp::OMPD_parallel_master_taskloop || + beginDir.v == llvm::omp::OMPD_parallel_master_taskloop_simd || + beginDir.v == llvm::omp::Directive::OMPD_target_loop) IssueNonConformanceWarning(beginDir.v, beginDir.source); ClearDataSharingAttributeObjects(); SetContextAssociatedLoopLevel(GetAssociatedLoopLevelFromClauses(clauseList)); @@ -2891,18 +2911,39 @@ void OmpAttributeVisitor::AddOmpRequiresToScope(Scope &scope, void OmpAttributeVisitor::IssueNonConformanceWarning( llvm::omp::Directive D, parser::CharBlock source) { - std::string warnStr = ""; - std::string dirName = llvm::omp::getOpenMPDirectiveName(D).str(); + std::string warnStr; + llvm::raw_string_ostream warnStrOS(warnStr); + warnStrOS << "OpenMP directive " + << parser::ToUpperCaseLetters( + llvm::omp::getOpenMPDirectiveName(D).str()) + << " has been deprecated"; + + auto setAlternativeStr = [&warnStrOS](llvm::StringRef alt) { + warnStrOS << ", please use " << alt << " instead."; + }; switch (D) { case llvm::omp::OMPD_master: - warnStr = "OpenMP directive '" + dirName + - "' has been deprecated, please use 'masked' instead."; + setAlternativeStr("MASKED"); + break; + case llvm::omp::OMPD_master_taskloop: + setAlternativeStr("MASKED TASKLOOP"); + break; + case llvm::omp::OMPD_master_taskloop_simd: + setAlternativeStr("MASKED TASKLOOP SIMD"); + break; + case llvm::omp::OMPD_parallel_master: + setAlternativeStr("PARALLEL MASKED"); + break; + case llvm::omp::OMPD_parallel_master_taskloop: + setAlternativeStr("PARALLEL MASKED TASKLOOP"); + break; + case llvm::omp::OMPD_parallel_master_taskloop_simd: + setAlternativeStr("PARALLEL_MASKED TASKLOOP SIMD"); break; case llvm::omp::OMPD_target_loop: - default: - warnStr = "OpenMP directive '" + dirName + "' has been deprecated."; + default:; } - context_.Warn( - common::UsageWarning::OpenMPUsage, source, "%s"_warn_en_US, warnStr); + context_.Warn(common::UsageWarning::OpenMPUsage, source, "%s"_warn_en_US, + warnStrOS.str()); } } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index add4e4befd3a2b..e0a8246ebc752e 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1538,6 +1538,13 @@ class OmpVisitor : public virtual DeclarationVisitor { void Post(const parser::OpenMPDeclarativeConstruct &) { messageHandler().set_currStmtSource(std::nullopt); } + bool Pre(const parser::OpenMPDepobjConstruct &x) { + AddOmpSourceRange(x.source); + return true; + } + void Post(const parser::OpenMPDepobjConstruct &x) { + messageHandler().set_currStmtSource(std::nullopt); + } bool Pre(const parser::OpenMPAtomicConstruct &x) { return common::visit(common::visitors{[&](const auto &u) -> bool { AddOmpSourceRange(u.source); diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp index fc48b4343eea9d..4778a4ae77683f 100644 --- a/flang/runtime/CUDA/memory.cpp +++ b/flang/runtime/CUDA/memory.cpp @@ -73,23 +73,22 @@ void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes, CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind)); } -void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr, +void RTDEF(CUFDataTransferDescPtr)(Descriptor *desc, void *addr, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a pointer to a descriptor"); } -void RTDEF(CUFDataTransferPtrDesc)(void *addr, const Descriptor &desc, +void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc, std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer from a descriptor to a pointer"); } -void RTDECL(CUFDataTransferDescDesc)(const Descriptor &dstDesc, - const Descriptor &srcDesc, unsigned mode, const char *sourceFile, - int sourceLine) { +void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc, + unsigned mode, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; terminator.Crash( "not yet implemented: CUDA data transfer between two descriptors"); diff --git a/flang/test/Fir/CUDA/cuda-constructor-2.f90 b/flang/test/Fir/CUDA/cuda-constructor-2.f90 new file mode 100644 index 00000000000000..378dabbb7c7e7d --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-constructor-2.f90 @@ -0,0 +1,22 @@ +// RUN: fir-opt --split-input-file --cuf-add-constructor %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + + fir.global @_QMmtestsEn(dense<[3, 4, 5, 6, 7]> : tensor<5xi32>) {data_attr = #cuf.cuda} : !fir.array<5xi32> + + gpu.module @cuda_device_mod [#nvvm.target] { + } +} + +// CHECK: gpu.module @cuda_device_mod [#nvvm.target] + +// CHECK: llvm.func internal @__cudaFortranConstructor() { +// CHECK-DAG: %[[MODULE:.*]] = cuf.register_module @cuda_device_mod -> !llvm.ptr +// CHECK-DAG: %[[VAR_NAME:.*]] = fir.address_of(@_QQ{{.*}}) : !fir.ref> +// CHECK-DAG: %[[VAR_ADDR:.*]] = fir.address_of(@_QMmtestsEn) : !fir.ref> +// CHECK-DAG: %[[MODULE2:.*]] = fir.convert %[[MODULE]] : (!llvm.ptr) -> !fir.ref> +// CHECK-DAG: %[[VAR_ADDR2:.*]] = fir.convert %[[VAR_ADDR]] : (!fir.ref>) -> !fir.ref +// CHECK-DAG: %[[VAR_NAME2:.*]] = fir.convert %[[VAR_NAME]] : (!fir.ref>) -> !fir.ref +// CHECK-DAG: %[[CST:.*]] = arith.constant 20 : index +// CHECK-DAG %[[CST2:.*]] = fir.convert %[[CST]] : (index) -> i64 +// CHECK fir.call @_FortranACUFRegisterVariable(%[[MODULE2]], %[[VAR_ADDR2]], %[[VAR_NAME2]], %[[CST2]]) : (!fir.ref>, !fir.ref, !fir.ref, i64) -> none diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index c33c50115b9fc0..b99e09fb76468b 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -15,11 +15,9 @@ func.func @_QPsub1() { // CHECK-LABEL: func.func @_QPsub1() // CHECK: %[[ADEV:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Eadev"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[AHOST:.*]]:2 = hlfir.declare %{{.*}} {fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Eahost"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) -// CHECK: %[[AHOST_LOAD:.*]] = fir.load %[[AHOST]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[AHOST_BOX:.*]] = fir.convert %[[AHOST_LOAD]] : (!fir.box>>) -> !fir.box -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box -// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[AHOST_BOX]], %[[ADEV_BOX]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.box, i32, !fir.ref, i32) -> none +// CHECK: %[[AHOST_BOX:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> +// CHECK: fir.call @_FortranACUFDataTransferDescDesc(%[[AHOST_BOX]], %[[ADEV_BOX]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref>, i32, !fir.ref, i32) -> none func.func @_QPsub2() { %0 = cuf.alloc !fir.box>> {bindc_name = "adev", data_attr = #cuf.cuda, uniq_name = "_QFsub2Eadev"} -> !fir.ref>>> @@ -76,19 +74,17 @@ func.func @_QPsub4() { // CHECK: %[[NBELEM:.*]] = arith.constant 10 : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>) -> !fir.llvm_ptr // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none // CHECK: %[[NBELEM:.*]] = arith.constant 10 : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#0 : (!fir.ref>) -> !fir.llvm_ptr -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.box, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.ref>, i64, i32, !fir.ref, i32) -> none func.func @_QPsub5(%arg0: !fir.ref {fir.bindc_name = "n"}) { %0 = fir.dummy_scope : !fir.dscope @@ -122,19 +118,17 @@ func.func @_QPsub5(%arg0: !fir.ref {fir.bindc_name = "n"}) { // CHECK: %[[NBELEM:.*]] = arith.muli %[[I1]], %[[I2]] : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#1 : (!fir.ref>) -> !fir.llvm_ptr // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.box, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferDescPtr(%[[ADEV_BOX]], %[[AHOST_PTR]], %[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.llvm_ptr, i64, i32, !fir.ref, i32) -> none // CHECK: %[[NBELEM:.*]] = arith.muli %[[I1]], %[[I2]] : index // CHECK: %[[WIDTH:.*]] = arith.constant 4 : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %[[WIDTH]] : index -// CHECK: %[[ADEV_LOAD:.*]] = fir.load %[[ADEV]]#0 : !fir.ref>>> // CHECK: %[[AHOST_PTR:.*]] = fir.convert %[[AHOST]]#1 : (!fir.ref>) -> !fir.llvm_ptr -// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV_LOAD]] : (!fir.box>>) -> !fir.box +// CHECK: %[[ADEV_BOX:.*]] = fir.convert %[[ADEV]]#0 : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.box, i64, i32, !fir.ref, i32) -> none +// CHECK: fir.call @_FortranACUFDataTransferPtrDesc(%[[AHOST_PTR]], %[[ADEV_BOX]], %[[BYTES_CONV]], %c1{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr, !fir.ref>, i64, i32, !fir.ref, i32) -> none func.func @_QPsub6() { %0 = cuf.alloc i32 {bindc_name = "idev", data_attr = #cuf.cuda, uniq_name = "_QFsub6Eidev"} -> !fir.ref diff --git a/flang/test/Fir/CUDA/cuda-launch.fir b/flang/test/Fir/CUDA/cuda-launch.fir new file mode 100644 index 00000000000000..f11bcbdb7fce55 --- /dev/null +++ b/flang/test/Fir/CUDA/cuda-launch.fir @@ -0,0 +1,64 @@ +// RUN: fir-opt --split-input-file --cuf-convert %s | FileCheck %s + + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + gpu.func @_QPsub_device1() kernel { + cf.br ^bb1 + ^bb1: // pred: ^bb0 + gpu.return + } + gpu.func @_QPsub_device2(%arg0: !fir.ref) kernel { + cf.br ^bb1(%arg0 : !fir.ref) + ^bb1(%0: !fir.ref): // pred: ^bb0 + %1 = fir.declare %0 {uniq_name = "_QFsub1Ei"} : (!fir.ref) -> !fir.ref + %cst = arith.constant 2.000000e+00 : f32 + fir.store %cst to %1 : !fir.ref + gpu.return + } + } + + func.func @_QQmain() attributes {fir.bindc_name = "main"} { + %0 = fir.alloca f32 + // CHECK: %[[ALLOCA:.*]] = fir.alloca f32 + %c1 = arith.constant 1 : index + %c11_i32 = arith.constant 11 : i32 + %c6_i32 = arith.constant 6 : i32 + %c1_i32 = arith.constant 1 : i32 + // CHECK: gpu.launch_func @cuda_device_mod::@_QPsub_device1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %c0{{.*}} + cuf.kernel_launch @cuda_device_mod::@_QPsub_device1<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>() + + // CHECK: gpu.launch_func @cuda_device_mod::@_QPsub_device2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) dynamic_shared_memory_size %c0{{.*}} args(%[[ALLOCA]] : !fir.ref) + cuf.kernel_launch @cuda_device_mod::@_QPsub_device2<<<%c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32, %c1_i32>>>(%0) : (!fir.ref) + return + } + +} + +// ----- + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + gpu.func @_QMmod1Psub1(%arg0: !fir.ref>) kernel { + gpu.return + } + } + + func.func @_QMmod1Psub1(%arg0: !fir.ref> {cuf.data_attr = #cuf.cuda, fir.bindc_name = "adev"}) attributes {cuf.cluster_dims = #cuf.cluster_dims, cuf.proc_attr = #cuf.cuda_proc} { + return + } + func.func @_QMmod1Phost_sub() { + %c10 = arith.constant 10 : index + %0 = cuf.alloc !fir.array<10xi32> {bindc_name = "adev", data_attr = #cuf.cuda, uniq_name = "_QMmod1Fhost_subEadev"} -> !fir.ref> + %1 = fir.shape %c10 : (index) -> !fir.shape<1> + %2:2 = hlfir.declare %0(%1) {data_attr = #cuf.cuda, uniq_name = "_QMmod1Fhost_subEadev"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + cuf.kernel_launch @_QMmod1Psub1<<<%c1_i32, %c1_i32, %c1_i32, %c10_i32, %c1_i32, %c1_i32>>>(%2#1) : (!fir.ref>) + return + } +} + +// CHECK-LABEL: func.func @_QMmod1Phost_sub() +// CHECK: gpu.launch_func @cuda_device_mod::@_QMmod1Psub1 clusters in (%c2{{.*}}, %c2{{.*}}, %c1{{.*}}) + diff --git a/flang/test/Fir/box.fir b/flang/test/Fir/box.fir index 81a4d8bc13bf01..fd9fa1f2b3aabd 100644 --- a/flang/test/Fir/box.fir +++ b/flang/test/Fir/box.fir @@ -56,12 +56,14 @@ func.func @fa(%a : !fir.ref>) { // CHECK-LABEL: define void @b1( // CHECK-SAME: ptr %[[res:.*]], ptr %[[arg0:.*]], i64 %[[arg1:.*]]) func.func @b1(%arg0 : !fir.ref>, %arg1 : index) -> !fir.box> { + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 } // CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]] // CHECK: insertvalue {{.*}} undef, i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %x = fir.embox %arg0 typeparams %arg1 : (!fir.ref>, index) -> !fir.box> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 24, i1 false) return %x : !fir.box> } @@ -71,11 +73,13 @@ func.func @b1(%arg0 : !fir.ref>, %arg1 : index) -> !fir.box>>, %arg1 : index) -> !fir.box>> { %1 = fir.shape %arg1 : (index) -> !fir.shape<1> + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK: insertvalue {{.*}} { ptr undef, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), i32 20240719, i8 1, i8 40, i8 0, i8 0, {{.*}} }, i64 %[[arg1]], 7, 0, 1 // CHECK: insertvalue {{.*}} %{{.*}}, i64 ptrtoint (ptr getelementptr ([5 x i8], ptr null, i32 1) to i64), 7, 0, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %2 = fir.embox %arg0(%1) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false) return %2 : !fir.box>> } @@ -84,6 +88,7 @@ func.func @b2(%arg0 : !fir.ref>>, %arg1 : index) -> // CHECK-SAME: ptr %[[res:.*]], ptr %[[arg0:.*]], i64 %[[arg1:.*]], i64 %[[arg2:.*]]) func.func @b3(%arg0 : !fir.ref>>, %arg1 : index, %arg2 : index) -> !fir.box>> { %1 = fir.shape %arg2 : (index) -> !fir.shape<1> + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]] // CHECK: insertvalue {{.*}} i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 @@ -91,7 +96,8 @@ func.func @b3(%arg0 : !fir.ref>>, %arg1 : index, %ar // CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %2 = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.box>> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false) return %2 : !fir.box>> } @@ -101,6 +107,7 @@ func.func @b3(%arg0 : !fir.ref>>, %arg1 : index, %ar func.func @b4(%arg0 : !fir.ref>>, %arg1 : index) -> !fir.box>> { %c_7 = arith.constant 7 : index %1 = fir.shape %c_7 : (index) -> !fir.shape<1> + // CHECK: %[[alloca:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } // CHECK: %[[size:.*]] = mul i64 ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64), %[[arg1]] // CHECK: insertvalue {{.*}} i64 %[[size]], 1 // CHECK: insertvalue {{.*}} i32 20240719, 2 @@ -108,7 +115,8 @@ func.func @b4(%arg0 : !fir.ref>>, %arg1 : index) -> // CHECK: insertvalue {{.*}} i64 %[[size]], 7, 0, 2 // CHECK: insertvalue {{.*}} ptr %[[arg0]], 0 %x = fir.embox %arg0(%1) typeparams %arg1 : (!fir.ref>>, !fir.shape<1>, index) -> !fir.box>> - // CHECK: store {{.*}}, ptr %[[res]] + // CHECK: store {{.*}}, ptr %[[alloca]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[res]], ptr %[[alloca]], i32 48, i1 false) return %x : !fir.box>> } @@ -117,8 +125,7 @@ func.func @b4(%arg0 : !fir.ref>>, %arg1 : index) -> // CHECK-SAME: ptr %[[arg0:.*]], ptr %[[arg1:.*]]) func.func @b5(%arg0 : !fir.ref>>>, %arg1 : !fir.box>>) { fir.store %arg1 to %arg0 : !fir.ref>>> - // CHECK: %[[boxLoad:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] }, ptr %[[arg1]] - // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [2 x [3 x i64]] } %[[boxLoad]], ptr %[[arg0]] + // CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %0, ptr %1, i32 72, i1 false) return } diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 335877e7c9a872..168526518865b4 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -799,8 +799,8 @@ func.func @_QPs(%arg0: !fir.ref> {fir.bindc_name = "x"}) { //CHECK: omp.parallel { //CHECK: %[[CONST_1:.*]] = llvm.mlir.constant(1 : i32) : i32 //CHECK: %[[ALLOCA_1:.*]] = llvm.alloca %[[CONST_1:.*]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {alignment = 8 : i64} : (i32) -> !llvm.ptr -//CHECK: %[[LOAD:.*]] = llvm.load %[[ALLOCA]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> -//CHECK: llvm.store %[[LOAD]], %[[ALLOCA_1]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>, !llvm.ptr +//CHECK: %[[SIZE:.*]] = llvm.mlir.constant(24 : i32) : i32 +//CHECK: "llvm.intr.memcpy"(%[[ALLOCA_1]], %[[ALLOCA]], %[[SIZE]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () //CHECK: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA_1]][0, 0] : (!llvm.ptr) -> !llvm.ptr //CHECK: %[[LOAD_2:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> !llvm.ptr //CHECK: omp.terminator diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 1182a0a10f218b..fa391fa6cc7a7d 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -862,8 +862,8 @@ func.func @test_store_box(%array : !fir.ref>>, %box // CHECK-LABEL: llvm.func @test_store_box // CHECK-SAME: (%[[arg0:.*]]: !llvm.ptr, // CHECK-SAME: %[[arg1:.*]]: !llvm.ptr) { -// CHECK-NEXT: %[[box_to_store:.*]] = llvm.load %arg1 : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)> -// CHECK-NEXT: llvm.store %[[box_to_store]], %[[arg0]] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i{{.*}}>>)>, !llvm.ptr +// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(72 : i32) : i32 +// CHECK-NEXT: "llvm.intr.memcpy"(%[[arg0]], %[[arg1]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () // CHECK-NEXT: llvm.return // CHECK-NEXT: } @@ -875,15 +875,17 @@ func.func @store_unlimited_polymorphic_box(%arg0 : !fir.class, %arg1 : !fi fir.store %arg3 to %arg3r : !fir.ref>> return } -// CHECK-LABEL: llvm.func @store_unlimited_polymorphic_box( -// CHECK: %[[VAL_8:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> -// CHECK: llvm.store %[[VAL_8]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr -// CHECK: %[[VAL_9:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)> -// CHECK: llvm.store %[[VAL_9]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr -// CHECK: %[[VAL_10:.*]] = llvm.load %{{.*}} : !llvm.ptr -> !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)> -// CHECK: llvm.store %[[VAL_10]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, ptr, array<1 x i{{.*}}>)>, !llvm.ptr -// CHECK: %[[VAL_11:.*]] = llvm.load %{{.*}}: !llvm.ptr -// CHECK: llvm.store %[[VAL_11]], %{{.*}} : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i{{.*}}>>, ptr, array<1 x i{{.*}}>)>, !llvm.ptr +// CHECK: llvm.func @store_unlimited_polymorphic_box(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: !llvm.ptr, %[[VAL_2:.*]]: !llvm.ptr, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: !llvm.ptr, %[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr) { +// CHECK: %[[VAL_8:.*]] = llvm.mlir.constant(40 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_4]], %[[VAL_0]], %[[VAL_8]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(64 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_5]], %[[VAL_1]], %[[VAL_9]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(40 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_6]], %[[VAL_2]], %[[VAL_10]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(64 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_7]], %[[VAL_3]], %[[VAL_11]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: llvm.return +// CHECK: } // ----- @@ -935,8 +937,8 @@ func.func @test_load_box(%addr : !fir.ref>>) { // GENERIC-NEXT: %[[box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])> // AMDGPU-NEXT: %[[alloca_box_copy:.*]] = llvm.alloca %[[c1]] x !llvm.struct<([[DESC_TYPE:.*]])>{{.*}} : (i32) -> !llvm.ptr<5> // AMDGPU-NEXT: %[[box_copy:.*]] = llvm.addrspacecast %[[alloca_box_copy]] : !llvm.ptr<5> to !llvm.ptr -// CHECK-NEXT: %[[box_val:.*]] = llvm.load %[[arg0]] : !llvm.ptr -> !llvm.struct<([[DESC_TYPE]])> -// CHECK-NEXT: llvm.store %[[box_val]], %[[box_copy]] : !llvm.struct<([[DESC_TYPE]])>, !llvm.ptr +// CHECK-NEXT: %[[size:.*]] = llvm.mlir.constant(48 : i32) : i32 +// CHECK-NEXT: "llvm.intr.memcpy"(%[[box_copy]], %[[arg0]], %[[size]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () // CHECK-NEXT: llvm.call @takes_box(%[[box_copy]]) : (!llvm.ptr) -> () // CHECK-NEXT: llvm.return // CHECK-NEXT: } diff --git a/flang/test/Fir/embox-char.fir b/flang/test/Fir/embox-char.fir index bf8344dbb60fc8..efb069f96520d4 100644 --- a/flang/test/Fir/embox-char.fir +++ b/flang/test/Fir/embox-char.fir @@ -1,3 +1,10 @@ +// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +// The script is designed to make adding checks to +// a test case fast, it is *not* designed to be authoritative +// about what constitutes a good test! The CHECK should be +// minimized and named to reflect the test intent. + // Test that the offset of the first element of the slice // is computed in elements of the type used for the GEP // computing the base of the slice. @@ -10,42 +17,40 @@ // print *, x(2,:) // end subroutine -// CHECK-LABEL: llvm.func @test_char4( -// CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr, -// CHECK-SAME: %[[VAL_1_SLICE_LB0:.*]]: i64, %[[VAL_2_SLICE_EX0:.*]]: i64, %[[VAL_3_SLICE_ST0:.*]]: i64, %[[VAL_4_SLICE_LB1:.*]]: i64, %[[VAL_5_SLICE_EX1:.*]]: i64, %[[VAL_6_SLICE_ST1:.*]]: i64) { +// CHECK: llvm.func @test_char4(%[[VAL_0:.*]]: !llvm.ptr, %[[VAL_1:.*]]: i64, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i64, %[[VAL_4:.*]]: i64, %[[VAL_5:.*]]: i64, %[[VAL_6:.*]]: i64) { // CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_10:.*]] = llvm.alloca %[[VAL_9]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %[[VAL_12:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[VAL_13_WIDTH:.*]] = llvm.mlir.constant(4 : index) : i64 -// CHECK: %[[VAL_14:.*]] = llvm.load %[[VAL_0]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_14]], %[[VAL_10]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_13:.*]] = llvm.mlir.constant(4 : index) : i64 +// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(72 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_10]], %[[VAL_0]], %[[VAL_14]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () // CHECK: %[[VAL_15:.*]] = llvm.getelementptr %[[VAL_10]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_16_BYTESIZE:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_16:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64 // CHECK: %[[VAL_17:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_18_LB1:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_18:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64 // CHECK: %[[VAL_19:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_20_EX1:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_20:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64 // CHECK: %[[VAL_21:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_22_ST1:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_22:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64 // CHECK: %[[VAL_23:.*]] = llvm.getelementptr %[[VAL_10]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_24_BASEPTR:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr +// CHECK: %[[VAL_24:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr // CHECK: %[[VAL_25:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_26_LB0:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_26:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64 // CHECK: %[[VAL_27:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_28_EX0:.*]] = llvm.load %[[VAL_27]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_28:.*]] = llvm.load %[[VAL_27]] : !llvm.ptr -> i64 // CHECK: %[[VAL_29:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_30_ST0:.*]] = llvm.load %[[VAL_29]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_31_LEN:.*]] = llvm.sdiv %[[VAL_16_BYTESIZE]], %[[VAL_13_WIDTH]] : i64 +// CHECK: %[[VAL_30:.*]] = llvm.load %[[VAL_29]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_31:.*]] = llvm.sdiv %[[VAL_16]], %[[VAL_13]] : i64 // CHECK: %[[VAL_32:.*]] = llvm.mlir.constant(44 : i32) : i32 // CHECK: %[[VAL_33:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[VAL_34:.*]] = llvm.getelementptr %[[VAL_33]][1] : (!llvm.ptr) -> !llvm.ptr, i32 // CHECK: %[[VAL_35:.*]] = llvm.ptrtoint %[[VAL_34]] : !llvm.ptr to i64 -// CHECK: %[[VAL_36_BYTESIZE:.*]] = llvm.mul %[[VAL_35]], %[[VAL_31_LEN]] : i64 +// CHECK: %[[VAL_36:.*]] = llvm.mul %[[VAL_35]], %[[VAL_31]] : i64 // CHECK: %[[VAL_37:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_36_BYTESIZE]], %[[VAL_37]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_37]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(20240719 : i32) : i32 // CHECK: %[[VAL_40:.*]] = llvm.insertvalue %[[VAL_39]], %[[VAL_38]][2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_41:.*]] = llvm.mlir.constant(2 : i32) : i32 @@ -59,39 +64,39 @@ // CHECK: %[[VAL_49:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[VAL_50:.*]] = llvm.trunc %[[VAL_49]] : i32 to i8 // CHECK: %[[VAL_51:.*]] = llvm.insertvalue %[[VAL_50]], %[[VAL_48]][6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_52_c0:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[VAL_52:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[VAL_53:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_54:.*]] = llvm.sub %[[VAL_1_SLICE_LB0]], %[[VAL_26_LB0]] : i64 -// CHECK: %[[VAL_55:.*]] = llvm.mul %[[VAL_54]], %[[VAL_31_LEN]] : i64 -// CHECK: %[[VAL_56_SLICE_OFF0:.*]] = llvm.add %[[VAL_55]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_57:.*]] = llvm.sub %[[VAL_2_SLICE_EX0]], %[[VAL_1_SLICE_LB0]] : i64 -// CHECK: %[[VAL_58:.*]] = llvm.add %[[VAL_57]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_59:.*]] = llvm.sdiv %[[VAL_58]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_60:.*]] = llvm.icmp "sgt" %[[VAL_59]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_61:.*]] = llvm.select %[[VAL_60]], %[[VAL_59]], %[[VAL_52_c0]] : i1, i64 +// CHECK: %[[VAL_54:.*]] = llvm.sub %[[VAL_1]], %[[VAL_26]] : i64 +// CHECK: %[[VAL_55:.*]] = llvm.mul %[[VAL_54]], %[[VAL_31]] : i64 +// CHECK: %[[VAL_56:.*]] = llvm.add %[[VAL_55]], %[[VAL_52]] : i64 +// CHECK: %[[VAL_57:.*]] = llvm.sub %[[VAL_2]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_58:.*]] = llvm.add %[[VAL_57]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_59:.*]] = llvm.sdiv %[[VAL_58]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_60:.*]] = llvm.icmp "sgt" %[[VAL_59]], %[[VAL_52]] : i64 +// CHECK: %[[VAL_61:.*]] = llvm.select %[[VAL_60]], %[[VAL_59]], %[[VAL_52]] : i1, i64 // CHECK: %[[VAL_62:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_51]][7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_61]], %[[VAL_62]][7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_3_SLICE_ST0]] : i64 +// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_36]], %[[VAL_3]] : i64 // CHECK: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_66:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_31_LEN]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_68:.*]] = llvm.sub %[[VAL_4_SLICE_LB1]], %[[VAL_18_LB1]] : i64 -// CHECK: %[[VAL_69_SLICE_OFF1:.*]] = llvm.mul %[[VAL_68]], %[[VAL_67]] : i64 -// CHECK: %[[VAL_70_OFFSET:.*]] = llvm.add %[[VAL_69_SLICE_OFF1]], %[[VAL_56_SLICE_OFF0]] : i64 -// CHECK: %[[VAL_71:.*]] = llvm.sub %[[VAL_5_SLICE_EX1]], %[[VAL_4_SLICE_LB1]] : i64 -// CHECK: %[[VAL_72:.*]] = llvm.add %[[VAL_71]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_73:.*]] = llvm.sdiv %[[VAL_72]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_74:.*]] = llvm.icmp "sgt" %[[VAL_73]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_75:.*]] = llvm.select %[[VAL_74]], %[[VAL_73]], %[[VAL_52_c0]] : i1, i64 +// CHECK: %[[VAL_66:.*]] = llvm.mul %[[VAL_36]], %[[VAL_28]] : i64 +// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_31]], %[[VAL_28]] : i64 +// CHECK: %[[VAL_68:.*]] = llvm.sub %[[VAL_4]], %[[VAL_18]] : i64 +// CHECK: %[[VAL_69:.*]] = llvm.mul %[[VAL_68]], %[[VAL_67]] : i64 +// CHECK: %[[VAL_70:.*]] = llvm.add %[[VAL_69]], %[[VAL_56]] : i64 +// CHECK: %[[VAL_71:.*]] = llvm.sub %[[VAL_5]], %[[VAL_4]] : i64 +// CHECK: %[[VAL_72:.*]] = llvm.add %[[VAL_71]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_73:.*]] = llvm.sdiv %[[VAL_72]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_74:.*]] = llvm.icmp "sgt" %[[VAL_73]], %[[VAL_52]] : i64 +// CHECK: %[[VAL_75:.*]] = llvm.select %[[VAL_74]], %[[VAL_73]], %[[VAL_52]] : i1, i64 // CHECK: %[[VAL_76:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_65]][7, 1, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_75]], %[[VAL_76]][7, 1, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_66]], %[[VAL_6_SLICE_ST1]] : i64 +// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_66]], %[[VAL_6]] : i64 // CHECK: %[[VAL_79:.*]] = llvm.insertvalue %[[VAL_78]], %[[VAL_77]][7, 1, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_80:.*]] = llvm.mul %[[VAL_66]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_81:.*]] = llvm.mul %[[VAL_67]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_82:.*]] = llvm.getelementptr %[[VAL_24_BASEPTR]]{{\[}}%[[VAL_70_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr, i32 -// CHECK: %[[VAL_84:.*]] = llvm.insertvalue %[[VAL_82]], %[[VAL_79]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_84]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_80:.*]] = llvm.mul %[[VAL_66]], %[[VAL_20]] : i64 +// CHECK: %[[VAL_81:.*]] = llvm.mul %[[VAL_67]], %[[VAL_20]] : i64 +// CHECK: %[[VAL_82:.*]] = llvm.getelementptr %[[VAL_24]]{{\[}}%[[VAL_70]]] : (!llvm.ptr, i64) -> !llvm.ptr, i32 +// CHECK: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_82]], %[[VAL_79]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: llvm.store %[[VAL_83]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr // CHECK: llvm.return // CHECK: } func.func @test_char4(%arg0: !fir.ref>>>>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) { @@ -108,86 +113,84 @@ func.func @test_char4(%arg0: !fir.ref>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_10:.*]] = llvm.alloca %[[VAL_9]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(0 : index) : i64 -// CHECK: %[[VAL_12_c1:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: %[[VAL_14:.*]] = llvm.load %[[VAL_0]] : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_14]], %[[VAL_10]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr -// CHECK: %[[VAL_15:.*]] = llvm.getelementptr %[[VAL_10]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_16_BYTESIZE:.*]] = llvm.load %[[VAL_15]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_17:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_18_LB1:.*]] = llvm.load %[[VAL_17]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_19:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_20_EX1:.*]] = llvm.load %[[VAL_19]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_21:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_22_ST1:.*]] = llvm.load %[[VAL_21]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_23:.*]] = llvm.getelementptr %[[VAL_10]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_24_BASEPTR:.*]] = llvm.load %[[VAL_23]] : !llvm.ptr -> !llvm.ptr -// CHECK: %[[VAL_25:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_26_LB0:.*]] = llvm.load %[[VAL_25]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_27:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_28_EX0:.*]] = llvm.load %[[VAL_27]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_29:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_30_ST0:.*]] = llvm.load %[[VAL_29]] : !llvm.ptr -> i64 -// CHECK: %[[VAL_32:.*]] = llvm.mlir.constant(40 : i32) : i32 -// CHECK: %[[VAL_33:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_34:.*]] = llvm.getelementptr %[[VAL_33]][1] : (!llvm.ptr) -> !llvm.ptr, i8 -// CHECK: %[[VAL_35:.*]] = llvm.ptrtoint %[[VAL_34]] : !llvm.ptr to i64 -// CHECK: %[[VAL_36_BYTESIZE:.*]] = llvm.mul %[[VAL_35]], %[[VAL_16_BYTESIZE]] : i64 -// CHECK: %[[VAL_37:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_36_BYTESIZE]], %[[VAL_37]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(20240719 : i32) : i32 -// CHECK: %[[VAL_40:.*]] = llvm.insertvalue %[[VAL_39]], %[[VAL_38]][2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_41:.*]] = llvm.mlir.constant(2 : i32) : i32 -// CHECK: %[[VAL_42:.*]] = llvm.trunc %[[VAL_41]] : i32 to i8 -// CHECK: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_42]], %[[VAL_40]][3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_44:.*]] = llvm.trunc %[[VAL_32]] : i32 to i8 -// CHECK: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_46:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[VAL_47:.*]] = llvm.trunc %[[VAL_46]] : i32 to i8 -// CHECK: %[[VAL_48:.*]] = llvm.insertvalue %[[VAL_47]], %[[VAL_45]][5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_49:.*]] = llvm.mlir.constant(0 : i32) : i32 -// CHECK: %[[VAL_50:.*]] = llvm.trunc %[[VAL_49]] : i32 to i8 -// CHECK: %[[VAL_51:.*]] = llvm.insertvalue %[[VAL_50]], %[[VAL_48]][6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_52_c0:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[VAL_53:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_54:.*]] = llvm.sub %[[VAL_1_SLICE_LB0]], %[[VAL_26_LB0]] : i64 -// CHECK: %[[VAL_55:.*]] = llvm.mul %[[VAL_54]], %[[VAL_16_BYTESIZE]] : i64 -// CHECK: %[[VAL_56_SLICE_OFF0:.*]] = llvm.add %[[VAL_55]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_57:.*]] = llvm.sub %[[VAL_2_SLICE_EX0]], %[[VAL_1_SLICE_LB0]] : i64 -// CHECK: %[[VAL_58:.*]] = llvm.add %[[VAL_57]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_59:.*]] = llvm.sdiv %[[VAL_58]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_60:.*]] = llvm.icmp "sgt" %[[VAL_59]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_61:.*]] = llvm.select %[[VAL_60]], %[[VAL_59]], %[[VAL_52_c0]] : i1, i64 -// CHECK: %[[VAL_62:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_51]][7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_61]], %[[VAL_62]][7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_3_SLICE_ST0]] : i64 -// CHECK: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_66:.*]] = llvm.mul %[[VAL_36_BYTESIZE]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_16_BYTESIZE]], %[[VAL_28_EX0]] : i64 -// CHECK: %[[VAL_68:.*]] = llvm.sub %[[VAL_4_SLICE_LB1]], %[[VAL_18_LB1]] : i64 -// CHECK: %[[VAL_69_SLICE_OFF1:.*]] = llvm.mul %[[VAL_68]], %[[VAL_67]] : i64 -// CHECK: %[[VAL_70_OFFSET:.*]] = llvm.add %[[VAL_69_SLICE_OFF1]], %[[VAL_56_SLICE_OFF0]] : i64 -// CHECK: %[[VAL_71:.*]] = llvm.sub %[[VAL_5_SLICE_EX1]], %[[VAL_4_SLICE_LB1]] : i64 -// CHECK: %[[VAL_72:.*]] = llvm.add %[[VAL_71]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_73:.*]] = llvm.sdiv %[[VAL_72]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_74:.*]] = llvm.icmp "sgt" %[[VAL_73]], %[[VAL_52_c0]] : i64 -// CHECK: %[[VAL_75:.*]] = llvm.select %[[VAL_74]], %[[VAL_73]], %[[VAL_52_c0]] : i1, i64 -// CHECK: %[[VAL_76:.*]] = llvm.insertvalue %[[VAL_53]], %[[VAL_65]][7, 1, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_75]], %[[VAL_76]][7, 1, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_66]], %[[VAL_6_SLICE_ST1]] : i64 -// CHECK: %[[VAL_79:.*]] = llvm.insertvalue %[[VAL_78]], %[[VAL_77]][7, 1, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: %[[VAL_80:.*]] = llvm.mul %[[VAL_66]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_81:.*]] = llvm.mul %[[VAL_67]], %[[VAL_20_EX1]] : i64 -// CHECK: %[[VAL_82:.*]] = llvm.getelementptr %[[VAL_24_BASEPTR]]{{\[}}%[[VAL_70_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8 -// CHECK: %[[VAL_84:.*]] = llvm.insertvalue %[[VAL_82]], %[[VAL_79]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> -// CHECK: llvm.store %[[VAL_84]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr +// CHECK: %[[VAL_12:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_13:.*]] = llvm.mlir.constant(72 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_10]], %[[VAL_0]], %[[VAL_13]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i32) -> () +// CHECK: %[[VAL_14:.*]] = llvm.getelementptr %[[VAL_10]][0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_15:.*]] = llvm.load %[[VAL_14]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_16:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_17:.*]] = llvm.load %[[VAL_16]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_18:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_19:.*]] = llvm.load %[[VAL_18]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_20:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_12]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_21:.*]] = llvm.load %[[VAL_20]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_22:.*]] = llvm.getelementptr %[[VAL_10]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_23:.*]] = llvm.load %[[VAL_22]] : !llvm.ptr -> !llvm.ptr +// CHECK: %[[VAL_24:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_25:.*]] = llvm.load %[[VAL_24]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_26:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_27:.*]] = llvm.load %[[VAL_26]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_28:.*]] = llvm.getelementptr %[[VAL_10]][0, 7, %[[VAL_11]], 2] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_29:.*]] = llvm.load %[[VAL_28]] : !llvm.ptr -> i64 +// CHECK: %[[VAL_30:.*]] = llvm.mlir.constant(40 : i32) : i32 +// CHECK: %[[VAL_31:.*]] = llvm.mlir.zero : !llvm.ptr +// CHECK: %[[VAL_32:.*]] = llvm.getelementptr %[[VAL_31]][1] : (!llvm.ptr) -> !llvm.ptr, i8 +// CHECK: %[[VAL_33:.*]] = llvm.ptrtoint %[[VAL_32]] : !llvm.ptr to i64 +// CHECK: %[[VAL_34:.*]] = llvm.mul %[[VAL_33]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_35:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_36:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_35]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_37:.*]] = llvm.mlir.constant(20240719 : i32) : i32 +// CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_37]], %[[VAL_36]][2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(2 : i32) : i32 +// CHECK: %[[VAL_40:.*]] = llvm.trunc %[[VAL_39]] : i32 to i8 +// CHECK: %[[VAL_41:.*]] = llvm.insertvalue %[[VAL_40]], %[[VAL_38]][3] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_42:.*]] = llvm.trunc %[[VAL_30]] : i32 to i8 +// CHECK: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_42]], %[[VAL_41]][4] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_44:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[VAL_45:.*]] = llvm.trunc %[[VAL_44]] : i32 to i8 +// CHECK: %[[VAL_46:.*]] = llvm.insertvalue %[[VAL_45]], %[[VAL_43]][5] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_47:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[VAL_48:.*]] = llvm.trunc %[[VAL_47]] : i32 to i8 +// CHECK: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_46]][6] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_50:.*]] = llvm.mlir.constant(0 : i64) : i64 +// CHECK: %[[VAL_51:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_52:.*]] = llvm.sub %[[VAL_1]], %[[VAL_25]] : i64 +// CHECK: %[[VAL_53:.*]] = llvm.mul %[[VAL_52]], %[[VAL_15]] : i64 +// CHECK: %[[VAL_54:.*]] = llvm.add %[[VAL_53]], %[[VAL_50]] : i64 +// CHECK: %[[VAL_55:.*]] = llvm.sub %[[VAL_2]], %[[VAL_1]] : i64 +// CHECK: %[[VAL_56:.*]] = llvm.add %[[VAL_55]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_57:.*]] = llvm.sdiv %[[VAL_56]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_58:.*]] = llvm.icmp "sgt" %[[VAL_57]], %[[VAL_50]] : i64 +// CHECK: %[[VAL_59:.*]] = llvm.select %[[VAL_58]], %[[VAL_57]], %[[VAL_50]] : i1, i64 +// CHECK: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_51]], %[[VAL_49]][7, 0, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_59]], %[[VAL_60]][7, 0, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_62:.*]] = llvm.mul %[[VAL_34]], %[[VAL_3]] : i64 +// CHECK: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_64:.*]] = llvm.mul %[[VAL_34]], %[[VAL_27]] : i64 +// CHECK: %[[VAL_65:.*]] = llvm.mul %[[VAL_15]], %[[VAL_27]] : i64 +// CHECK: %[[VAL_66:.*]] = llvm.sub %[[VAL_4]], %[[VAL_17]] : i64 +// CHECK: %[[VAL_67:.*]] = llvm.mul %[[VAL_66]], %[[VAL_65]] : i64 +// CHECK: %[[VAL_68:.*]] = llvm.add %[[VAL_67]], %[[VAL_54]] : i64 +// CHECK: %[[VAL_69:.*]] = llvm.sub %[[VAL_5]], %[[VAL_4]] : i64 +// CHECK: %[[VAL_70:.*]] = llvm.add %[[VAL_69]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_71:.*]] = llvm.sdiv %[[VAL_70]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_72:.*]] = llvm.icmp "sgt" %[[VAL_71]], %[[VAL_50]] : i64 +// CHECK: %[[VAL_73:.*]] = llvm.select %[[VAL_72]], %[[VAL_71]], %[[VAL_50]] : i1, i64 +// CHECK: %[[VAL_74:.*]] = llvm.insertvalue %[[VAL_51]], %[[VAL_63]][7, 1, 0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_73]], %[[VAL_74]][7, 1, 1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_76:.*]] = llvm.mul %[[VAL_64]], %[[VAL_6]] : i64 +// CHECK: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][7, 1, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: %[[VAL_78:.*]] = llvm.mul %[[VAL_64]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_79:.*]] = llvm.mul %[[VAL_65]], %[[VAL_19]] : i64 +// CHECK: %[[VAL_80:.*]] = llvm.getelementptr %[[VAL_23]]{{\[}}%[[VAL_68]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8 +// CHECK: %[[VAL_81:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_77]][0] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> +// CHECK: llvm.store %[[VAL_81]], %[[VAL_8]] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>, !llvm.ptr // CHECK: llvm.return // CHECK: } func.func @test_char1(%arg0: !fir.ref>>>>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) { diff --git a/flang/test/Fir/polymorphic.fir b/flang/test/Fir/polymorphic.fir index 40204314e8df79..78e5b8dcf84c78 100644 --- a/flang/test/Fir/polymorphic.fir +++ b/flang/test/Fir/polymorphic.fir @@ -14,8 +14,7 @@ func.func @_QMpolymorphic_testPtest_allocate_unlimited_polymorphic_non_derived() // CHECK: %[[MEM:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } // CHECK: %[[DESC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 0, i32 20240719, i8 0, i8 -1, i8 1, i8 1, ptr null, [1 x i64] zeroinitializer }, ptr %[[MEM]] -// CHECK: %[[LOADED:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[MEM]], align 8 -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED]], ptr %[[DESC]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[DESC]], ptr %[[MEM]], i32 40, i1 false) // CHECK: ret void // CHECK: } @@ -66,8 +65,7 @@ func.func @_QMpolymorphic_testPtest_embox() { // CHECK-LABEL: @_QMpolymorphic_testPtest_embox() // CHECK: %[[ALLOCA_DESC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } { ptr @_QFEy, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20240719, i8 1, i8 9, {{.*}}, ptr %[[ALLOCA_DESC]] -// CHECK: %[[LOADED_DESC:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %[[ALLOCA_DESC]], align 8 -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[LOADED_DESC]], ptr @_QFEx, align 8 +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr @_QFEx, ptr %[[ALLOCA_DESC]], i32 64, i1 false) // Test emboxing of an array element from an unlimited polymorphic array. @@ -158,8 +156,7 @@ func.func @_QQmain() { // CHECK: %[[CLASS_NONE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } // CHECK: %[[DESC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 // CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr @_QMmod1Ea, i64 ptrtoint (ptr getelementptr (%_QMmod1TtK2, ptr null, i32 1) to i64), i32 20240719, i8 0, i8 42, i8 1, i8 1, ptr @_QMmod1EXdtXtX2, [1 x i64] zeroinitializer }, ptr %[[CLASS_NONE]], align 8 -// CHECK: %[[LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS_NONE]] -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD]], ptr %[[DESC]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[DESC]], ptr %[[CLASS_NONE]], i32 40, i1 false) // CHECK: call void @_QMmod1Psub1(ptr %[[DESC]]) fir.global @_QMmod2Ep : !fir.class> { @@ -180,8 +177,7 @@ func.func private @_FortranAPointerAssociate(!fir.ref>, !fir.box< // CHECK-LABEL: define void @_QMmod2Pinitp( // CHECK-SAME: ptr %[[ARG0:.*]]){{.*}}{ // CHECK: %[[ALLOCA_CLASS_NONE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } -// CHECK: %[[LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ARG0]] -// CHECK: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD]], ptr %[[ALLOCA_CLASS_NONE]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA_CLASS_NONE]], ptr %[[ARG0]], i32 40, i1 false) // CHECK: %{{.*}} = call {} @_FortranAPointerAssociate(ptr @_QMmod2Ep, ptr %[[ALLOCA_CLASS_NONE]]) // CHECK: ret void diff --git a/flang/test/Fir/tbaa.fir b/flang/test/Fir/tbaa.fir index 809ab3a922a0fe..401ebbc8c49fe6 100644 --- a/flang/test/Fir/tbaa.fir +++ b/flang/test/Fir/tbaa.fir @@ -137,8 +137,8 @@ module { // CHECK: %[[VAL_7:.*]] = llvm.mlir.addressof @_QFEx : !llvm.ptr // CHECK: %[[VAL_8:.*]] = llvm.mlir.addressof @_QQclX2E2F64756D6D792E66393000 : !llvm.ptr // CHECK: %[[VAL_10:.*]] = llvm.call @_FortranAioBeginExternalListOutput(%[[VAL_6]], %[[VAL_8]], %[[VAL_5]]) {fastmathFlags = #llvm.fastmath} : (i32, !llvm.ptr, i32) -> !llvm.ptr -// CHECK: %[[VAL_11:.*]] = llvm.load %[[VAL_7]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)> -// CHECK: llvm.store %[[VAL_11]], %[[VAL_3]] {tbaa = [#[[$BOXT]]]} : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)>, !llvm.ptr +// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(64 : i32) : i32 +// CHECK: "llvm.intr.memcpy"(%[[VAL_3]], %[[VAL_7]], %[[VAL_11]]) <{isVolatile = false, tbaa = [#[[$BOXT]]]}> // CHECK: %[[VAL_12:.*]] = llvm.getelementptr %[[VAL_3]][0, 7, %[[VAL_4]], 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)> // CHECK: %[[VAL_13:.*]] = llvm.load %[[VAL_12]] {tbaa = [#[[$BOXT]]]} : !llvm.ptr -> i64 // CHECK: %[[VAL_14:.*]] = llvm.getelementptr %[[VAL_3]][0, 7, %[[VAL_4]], 1] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)> diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90 index 62d0a3faf0c593..63ac6fbe05ee0e 100644 --- a/flang/test/Integration/OpenMP/private-global.f90 +++ b/flang/test/Integration/OpenMP/private-global.f90 @@ -31,8 +31,9 @@ program bug ! CHECK: %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 ! CHECK: %[[TABLE_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } { ptr undef, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64), i32 20240719, i8 1, i8 9, i8 0, i8 0, [1 x [3 x i64]] {{\[\[}}3 x i64] [i64 1, i64 10, i64 ptrtoint (ptr getelementptr (i32, ptr null, i32 1) to i64)]] }, ptr %[[PRIV_TABLE]], 0 ! CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL]], ptr %[[TABLE_BOX_ADDR]], align 8 -! CHECK: %[[TABLE_BOX_VAL2:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[TABLE_BOX_ADDR]], align 8 -! CHECK: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL2]], ptr %[[TABLE_BOX_ADDR2]], align 8 +! CHECK : %[[TABLE_BOX_VAL2:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[TABLE_BOX_ADDR]], align 8 +! CHECK : store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[TABLE_BOX_VAL2]], ptr %[[TABLE_BOX_ADDR2]], align 8 +! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[TABLE_BOX_ADDR]], i32 48, i1 false) ! CHECK: %[[VAL_26:.*]] = call {} @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9) ! ... ! check that we use the private copy of table for table/=50 diff --git a/flang/test/Lower/Intrinsics/ieee_real.f90 b/flang/test/Lower/Intrinsics/ieee_real.f90 new file mode 100644 index 00000000000000..20b7441e6e3a2a --- /dev/null +++ b/flang/test/Lower/Intrinsics/ieee_real.f90 @@ -0,0 +1,217 @@ +! RUN: bbc -emit-hlfir -o - %s | FileCheck %s + +! CHECK-LABEL: c.func @_QQmain +program p + use ieee_arithmetic, only: ieee_real + + ! CHECK: %[[V_0:[0-9]+]] = fir.alloca i16 {bindc_name = "j2", uniq_name = "_QFEj2"} + ! CHECK: %[[V_1:[0-9]+]]:2 = hlfir.declare %[[V_0]] {uniq_name = "_QFEj2"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[V_2:[0-9]+]] = fir.alloca i64 {bindc_name = "j8", uniq_name = "_QFEj8"} + ! CHECK: %[[V_3:[0-9]+]]:2 = hlfir.declare %[[V_2]] {uniq_name = "_QFEj8"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[V_4:[0-9]+]] = fir.alloca f16 {bindc_name = "x2", uniq_name = "_QFEx2"} + ! CHECK: %[[V_5:[0-9]+]]:2 = hlfir.declare %[[V_4]] {uniq_name = "_QFEx2"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[V_6:[0-9]+]] = fir.alloca f32 {bindc_name = "x4", uniq_name = "_QFEx4"} + ! CHECK: %[[V_7:[0-9]+]]:2 = hlfir.declare %[[V_6]] {uniq_name = "_QFEx4"} : (!fir.ref) -> (!fir.ref, !fir.ref) + ! CHECK: %[[V_8:[0-9]+]] = fir.alloca f64 {bindc_name = "x8", uniq_name = "_QFEx8"} + ! CHECK: %[[V_9:[0-9]+]]:2 = hlfir.declare %[[V_8]] {uniq_name = "_QFEx8"} : (!fir.ref) -> (!fir.ref, !fir.ref) + integer(2) :: j2 + integer(8) :: j8 + real(2) :: x2 + real(4) :: x4 + real(8) :: x8 + + ! CHECK: hlfir.assign %c-32768{{.*}} to %[[V_1]]#0 : i16, !fir.ref + j2 = -huge(j2) - 1 + + ! CHECK: %[[V_10:[0-9]+]] = fir.load %[[V_1]]#0 : !fir.ref + ! CHECK: %[[V_11:[0-9]+]] = fir.convert %[[V_10]] : (i16) -> f32 + ! CHECK: hlfir.assign %[[V_11]] to %[[V_7]]#0 : f32, !fir.ref + x4 = ieee_real(j2,4) ! exact +! print*, j2, ' -> ', x4 + + ! CHECK: hlfir.assign %c33{{.*}} to %[[V_3]]#0 : i64, !fir.ref + j8 = 33 + + ! CHECK: %[[V_12:[0-9]+]] = fir.load %[[V_3]]#0 : !fir.ref + ! CHECK: %[[V_13:[0-9]+]] = fir.convert %[[V_12]] : (i64) -> f32 + ! CHECK: %[[V_14:[0-9]+]] = fir.convert %[[V_13]] : (f32) -> i64 + ! CHECK: %[[V_15:[0-9]+]] = arith.cmpi eq, %[[V_12]], %[[V_14]] : i64 + ! CHECK: %[[V_16:[0-9]+]] = fir.if %[[V_15]] -> (f32) { + ! CHECK: fir.result %[[V_13]] : f32 + ! CHECK: } else { + ! CHECK: %[[V_27:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 + ! CHECK-DAG: %[[V_28:[0-9]+]] = arith.cmpi slt, %[[V_12]], %c0{{.*}} : i64 + ! CHECK-DAG: %[[V_29:[0-9]+]] = arith.cmpi sgt, %[[V_12]], %c0{{.*}} : i64 + ! CHECK-DAG: %[[V_30:[0-9]+]] = arith.bitcast %[[V_13]] : f32 to i32 + ! CHECK-DAG: %[[V_31:[0-9]+]] = arith.andi %[[V_30]], %c1{{.*}} : i32 + ! CHECK-DAG: %[[V_32:[0-9]+]] = fir.convert %[[V_31]] : (i32) -> i1 + ! CHECK-DAG: %[[V_33:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c5{{.*}} : i32 + ! CHECK-DAG: %[[V_34:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c1{{.*}} : i32 + ! CHECK-DAG: %[[V_35:[0-9]+]] = arith.ori %[[V_34]], %[[V_33]] : i1 + ! CHECK-DAG: %[[V_36:[0-9]+]] = arith.andi %[[V_35]], %[[V_32]] : i1 + ! CHECK-DAG: %[[V_37:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c0{{.*}} : i32 + ! CHECK-DAG: %[[V_38:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c4{{.*}} : i32 + ! CHECK-DAG: %[[V_39:[0-9]+]] = arith.cmpi slt, %[[V_12]], %[[V_14]] : i64 + ! CHECK-DAG: %[[V_40:[0-9]+]] = arith.addi %[[V_30]], %c1{{.*}} : i32 + ! CHECK-DAG: %[[V_41:[0-9]+]] = arith.subi %[[V_30]], %c1{{.*}} : i32 + ! CHECK: %[[V_42:[0-9]+]] = fir.if %[[V_39]] -> (f32) { + ! CHECK-DAG: %[[V_44:[0-9]+]] = arith.andi %[[V_37]], %[[V_29]] : i1 + ! CHECK-DAG: %[[V_45:[0-9]+]] = arith.andi %[[V_38]], %[[V_28]] : i1 + ! CHECK-DAG: %[[V_46:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c3{{.*}} : i32 + ! CHECK-DAG: %[[V_47:[0-9]+]] = arith.ori %[[V_36]], %[[V_44]] : i1 + ! CHECK-DAG: %[[V_48:[0-9]+]] = arith.ori %[[V_47]], %[[V_45]] : i1 + ! CHECK-DAG: %[[V_49:[0-9]+]] = arith.ori %[[V_48]], %[[V_46]] : i1 + ! CHECK: %[[V_50:[0-9]+]] = fir.if %[[V_49]] -> (f32) { + ! CHECK: %[[V_51:[0-9]+]] = arith.select %[[V_28]], %[[V_40]], %[[V_41]] : i32 + ! CHECK: %[[V_52:[0-9]+]] = arith.bitcast %[[V_51]] : i32 to f32 + ! CHECK: fir.result %[[V_52]] : f32 + ! CHECK: } else { + ! CHECK: fir.result %[[V_13]] : f32 + ! CHECK: } + ! CHECK: fir.result %[[V_50]] : f32 + ! CHECK: } else { + ! CHECK-DAG: %[[V_44:[0-9]+]] = arith.andi %[[V_37]], %[[V_28]] : i1 + ! CHECK-DAG: %[[V_45:[0-9]+]] = arith.andi %[[V_38]], %[[V_29]] : i1 + ! CHECK-DAG: %[[V_46:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c2{{.*}} : i32 + ! CHECK-DAG: %[[V_47:[0-9]+]] = arith.ori %[[V_36]], %[[V_44]] : i1 + ! CHECK-DAG: %[[V_48:[0-9]+]] = arith.ori %[[V_47]], %[[V_45]] : i1 + ! CHECK-DAG: %[[V_49:[0-9]+]] = arith.ori %[[V_48]], %[[V_46]] : i1 + ! CHECK: %[[V_50:[0-9]+]] = fir.if %[[V_49]] -> (f32) { + ! CHECK: %[[V_51:[0-9]+]] = arith.select %[[V_29]], %[[V_40]], %[[V_41]] : i32 + ! CHECK: %[[V_52:[0-9]+]] = arith.bitcast %[[V_51]] : i32 to f32 + ! CHECK: fir.result %[[V_52]] : f32 + ! CHECK: } else { + ! CHECK: fir.result %[[V_13]] : f32 + ! CHECK: } + ! CHECK: fir.result %[[V_50]] : f32 + ! CHECK: } + ! CHECK: %[[V_43:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_42]]) <{bit = 516 : i32}> : (f32) -> i1 + ! CHECK: fir.if %[[V_43]] { + ! CHECK: %[[V_44:[0-9]+]] = fir.call @_FortranAMapException(%c40{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_45:[0-9]+]] = fir.call @feraiseexcept(%[[V_44]]) fastmath : (i32) -> i32 + ! CHECK: } else { + ! CHECK: %[[V_44:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_42]]) <{bit = 240 : i32}> : (f32) -> i1 + ! CHECK: fir.if %[[V_44]] { + ! CHECK: %[[V_45:[0-9]+]] = fir.call @_FortranAMapException(%c48{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_46:[0-9]+]] = fir.call @feraiseexcept(%[[V_45]]) fastmath : (i32) -> i32 + ! CHECK: } else { + ! CHECK: %[[V_45:[0-9]+]] = fir.call @_FortranAMapException(%c32{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_46:[0-9]+]] = fir.call @feraiseexcept(%[[V_45]]) fastmath : (i32) -> i32 + ! CHECK: } + ! CHECK: } + ! CHECK: fir.result %[[V_42]] : f32 + ! CHECK: } + ! CHECK: hlfir.assign %[[V_16]] to %[[V_7]]#0 : f32, !fir.ref + x4 = ieee_real(j8,4) +! print*, j8, ' -> ', x4 + + ! CHECK: hlfir.assign %cst{{[_0-9]*}} to %[[V_5]]#0 : f16, !fir.ref + x2 = 3.33 + + ! CHECK: %[[V_17:[0-9]+]] = fir.load %[[V_5]]#0 : !fir.ref + ! CHECK: %[[V_18:[0-9]+]] = fir.convert %[[V_17]] : (f16) -> f32 + ! CHECK: %[[V_19:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_18]]) <{bit = 1 : i32}> : (f32) -> i1 + ! CHECK: %[[V_20:[0-9]+]] = fir.if %[[V_19]] -> (f32) { + ! CHECK: %[[V_27:[0-9]+]] = fir.call @_FortranAMapException(%c1{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_28:[0-9]+]] = fir.call @feraiseexcept(%[[V_27]]) fastmath : (i32) -> i32 + ! CHECK: %[[V_29:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_4) : !fir.ref> + ! CHECK: %[[V_30:[0-9]+]] = fir.coordinate_of %[[V_29]], %c2{{.*}} : (!fir.ref>, i8) -> !fir.ref + ! CHECK: %[[V_31:[0-9]+]] = fir.load %[[V_30]] : !fir.ref + ! CHECK: %[[V_32:[0-9]+]] = arith.bitcast %[[V_31]] : i32 to f32 + ! CHECK: fir.result %[[V_32]] : f32 + ! CHECK: } else { + ! CHECK: fir.result %[[V_18]] : f32 + ! CHECK: } + ! CHECK: %[[V_21:[0-9]+]] = fir.convert %[[V_20]] : (f32) -> f16 + ! CHECK: hlfir.assign %[[V_21]] to %[[V_5]]#0 : f16, !fir.ref + x2 = ieee_real(x2,4) ! exact +! print*, x2, ' -> ', x2 + + ! CHECK: hlfir.assign %cst{{[_0-9]*}} to %[[V_9]]#0 : f64, !fir.ref + x8 = -0. + + ! CHECK: %[[V_22:[0-9]+]] = fir.load %[[V_9]]#0 : !fir.ref + ! CHECK: %[[V_23:[0-9]+]] = fir.convert %[[V_22]] : (f64) -> f32 + ! CHECK: %[[V_24:[0-9]+]] = fir.convert %[[V_23]] : (f32) -> f64 + ! CHECK: %[[V_25:[0-9]+]] = arith.cmpf ueq, %[[V_22]], %[[V_24]] fastmath : f64 + ! CHECK: %[[V_26:[0-9]+]] = fir.if %[[V_25]] -> (f32) { + ! CHECK: %[[V_27:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_23]]) <{bit = 1 : i32}> : (f32) -> i1 + ! CHECK: %[[V_28:[0-9]+]] = fir.if %[[V_27]] -> (f32) { + ! CHECK: %[[V_29:[0-9]+]] = fir.call @_FortranAMapException(%c1{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_30:[0-9]+]] = fir.call @feraiseexcept(%[[V_29]]) fastmath : (i32) -> i32 + ! CHECK: %[[V_31:[0-9]+]] = fir.address_of(@_FortranAIeeeValueTable_4) : !fir.ref> + ! CHECK: %[[V_32:[0-9]+]] = fir.coordinate_of %[[V_31]], %c2{{.*}} : (!fir.ref>, i8) -> !fir.ref + ! CHECK: %[[V_33:[0-9]+]] = fir.load %[[V_32]] : !fir.ref + ! CHECK: %[[V_34:[0-9]+]] = arith.bitcast %[[V_33]] : i32 to f32 + ! CHECK: fir.result %[[V_34]] : f32 + ! CHECK: } else { + ! CHECK: fir.result %[[V_23]] : f32 + ! CHECK: } + ! CHECK: fir.result %[[V_28]] : f32 + ! CHECK: } else { + ! CHECK-DAG: %[[V_27:[0-9]+]] = fir.call @llvm.get.rounding() fastmath : () -> i32 + ! CHECK-DAG: %[[V_28:[0-9]+]] = arith.cmpf olt, %[[V_22]], %cst{{[_0-9]*}} fastmath : f64 + ! CHECK-DAG: %[[V_29:[0-9]+]] = arith.cmpf ogt, %[[V_22]], %cst{{[_0-9]*}} fastmath : f64 + ! CHECK-DAG: %[[V_30:[0-9]+]] = arith.bitcast %[[V_23]] : f32 to i32 + ! CHECK-DAG: %[[V_31:[0-9]+]] = arith.andi %[[V_30]], %c1{{.*}} : i32 + ! CHECK-DAG: %[[V_32:[0-9]+]] = fir.convert %[[V_31]] : (i32) -> i1 + ! CHECK-DAG: %[[V_33:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c5{{.*}} : i32 + ! CHECK-DAG: %[[V_34:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c1{{.*}} : i32 + ! CHECK-DAG: %[[V_35:[0-9]+]] = arith.ori %[[V_34]], %[[V_33]] : i1 + ! CHECK-DAG: %[[V_36:[0-9]+]] = arith.andi %[[V_35]], %[[V_32]] : i1 + ! CHECK-DAG: %[[V_37:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c0{{.*}} : i32 + ! CHECK-DAG: %[[V_38:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c4{{.*}} : i32 + ! CHECK-DAG: %[[V_39:[0-9]+]] = arith.cmpf olt, %[[V_22]], %[[V_24]] fastmath : f64 + ! CHECK-DAG: %[[V_40:[0-9]+]] = arith.addi %[[V_30]], %c1{{.*}} : i32 + ! CHECK-DAG: %[[V_41:[0-9]+]] = arith.subi %[[V_30]], %c1{{.*}} : i32 + ! CHECK: %[[V_42:[0-9]+]] = fir.if %[[V_39]] -> (f32) { + ! CHECK-DAG: %[[V_44:[0-9]+]] = arith.andi %[[V_37]], %[[V_29]] : i1 + ! CHECK-DAG: %[[V_45:[0-9]+]] = arith.andi %[[V_38]], %[[V_28]] : i1 + ! CHECK-DAG: %[[V_46:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c3{{.*}} : i32 + ! CHECK-DAG: %[[V_47:[0-9]+]] = arith.ori %[[V_36]], %[[V_44]] : i1 + ! CHECK-DAG: %[[V_48:[0-9]+]] = arith.ori %[[V_47]], %[[V_45]] : i1 + ! CHECK-DAG: %[[V_49:[0-9]+]] = arith.ori %[[V_48]], %[[V_46]] : i1 + ! CHECK: %[[V_50:[0-9]+]] = fir.if %[[V_49]] -> (f32) { + ! CHECK: %[[V_51:[0-9]+]] = arith.select %[[V_28]], %[[V_40]], %[[V_41]] : i32 + ! CHECK: %[[V_52:[0-9]+]] = arith.bitcast %[[V_51]] : i32 to f32 + ! CHECK: fir.result %[[V_52]] : f32 + ! CHECK: } else { + ! CHECK: fir.result %[[V_23]] : f32 + ! CHECK: } + ! CHECK: fir.result %[[V_50]] : f32 + ! CHECK: } else { + ! CHECK-DAG: %[[V_44:[0-9]+]] = arith.andi %[[V_37]], %[[V_28]] : i1 + ! CHECK-DAG: %[[V_45:[0-9]+]] = arith.andi %[[V_38]], %[[V_29]] : i1 + ! CHECK-DAG: %[[V_46:[0-9]+]] = arith.cmpi eq, %[[V_27]], %c2{{.*}} : i32 + ! CHECK-DAG: %[[V_47:[0-9]+]] = arith.ori %[[V_36]], %[[V_44]] : i1 + ! CHECK-DAG: %[[V_48:[0-9]+]] = arith.ori %[[V_47]], %[[V_45]] : i1 + ! CHECK-DAG: %[[V_49:[0-9]+]] = arith.ori %[[V_48]], %[[V_46]] : i1 + ! CHECK: %[[V_50:[0-9]+]] = fir.if %[[V_49]] -> (f32) { + ! CHECK: %[[V_51:[0-9]+]] = arith.select %[[V_29]], %[[V_40]], %[[V_41]] : i32 + ! CHECK: %[[V_52:[0-9]+]] = arith.bitcast %[[V_51]] : i32 to f32 + ! CHECK: fir.result %[[V_52]] : f32 + ! CHECK: } else { + ! CHECK: fir.result %[[V_23]] : f32 + ! CHECK: } + ! CHECK: fir.result %[[V_50]] : f32 + ! CHECK: } + ! CHECK: %[[V_43:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_42]]) <{bit = 516 : i32}> : (f32) -> i1 + ! CHECK: fir.if %[[V_43]] { + ! CHECK: %[[V_44:[0-9]+]] = fir.call @_FortranAMapException(%c40{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_45:[0-9]+]] = fir.call @feraiseexcept(%[[V_44]]) fastmath : (i32) -> i32 + ! CHECK: } else { + ! CHECK: %[[V_44:[0-9]+]] = "llvm.intr.is.fpclass"(%[[V_42]]) <{bit = 240 : i32}> : (f32) -> i1 + ! CHECK: fir.if %[[V_44]] { + ! CHECK: %[[V_45:[0-9]+]] = fir.call @_FortranAMapException(%c48{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_46:[0-9]+]] = fir.call @feraiseexcept(%[[V_45]]) fastmath : (i32) -> i32 + ! CHECK: } else { + ! CHECK: %[[V_45:[0-9]+]] = fir.call @_FortranAMapException(%c32{{.*}}) fastmath : (i32) -> i32 + ! CHECK: %[[V_46:[0-9]+]] = fir.call @feraiseexcept(%[[V_45]]) fastmath : (i32) -> i32 + ! CHECK: } + ! CHECK: } + ! CHECK: fir.result %[[V_42]] : f32 + ! CHECK: } + ! CHECK: hlfir.assign %[[V_26]] to %[[V_7]]#0 : f32, !fir.ref + x4 = ieee_real(x8,4) +! print*, x8, ' -> ', x4 +end diff --git a/flang/test/Lower/OpenMP/Todo/depobj-construct.f90 b/flang/test/Lower/OpenMP/Todo/depobj-construct.f90 new file mode 100644 index 00000000000000..2b3c4d92c4a4f8 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/depobj-construct.f90 @@ -0,0 +1,9 @@ +!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: OpenMPDepobjConstruct +subroutine f00() + integer :: obj + integer :: x + !$omp depobj(obj) depend(in: x) +end diff --git a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 index 9c97c689dad709..b3a668018df1d5 100644 --- a/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 +++ b/flang/test/Lower/OpenMP/delayed-privatization-allocatable-firstprivate.f90 @@ -57,5 +57,4 @@ end program compilation_to_obj ! LLVM: @[[GLOB_VAR:[^[:space:]]+]]t = internal global ! LLVM: define internal void @_QQmain..omp_par -! LLVM: %[[GLOB_VAL:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr @[[GLOB_VAR]]t, align 8 -! LLVM-NEXT: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] } %[[GLOB_VAL]], ptr %{{.*}}, align 8 +! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %{{.+}}, ptr @[[GLOB_VAR]]t, i32 48, i1 false) diff --git a/flang/test/Lower/OpenMP/master_taskloop.f90 b/flang/test/Lower/OpenMP/master_taskloop.f90 new file mode 100644 index 00000000000000..26f664b2662dcb --- /dev/null +++ b/flang/test/Lower/OpenMP/master_taskloop.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP master taskloop Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_master_taskloop + integer :: i, j = 1 + !CHECK: not yet implemented: Taskloop construct + !$omp master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop +end subroutine diff --git a/flang/test/Lower/OpenMP/master_taskloop_simd.f90 b/flang/test/Lower/OpenMP/master_taskloop_simd.f90 new file mode 100644 index 00000000000000..e928afd65244a4 --- /dev/null +++ b/flang/test/Lower/OpenMP/master_taskloop_simd.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP master taskloop simd Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_master_taskloop_simd() + integer :: i, j = 1 + !CHECK: not yet implemented: Composite TASKLOOP SIMD + !$omp master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop simd +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 new file mode 100644 index 00000000000000..086ed01d16d364 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-master-taskloop-simd.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP parallel master taskloop simd Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_parallel_master_taskloop_simd + integer :: i, j = 1 + !CHECK: not yet implemented: Composite TASKLOOP SIMD + !$omp parallel master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop simd +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-master-taskloop.f90 b/flang/test/Lower/OpenMP/parallel-master-taskloop.f90 new file mode 100644 index 00000000000000..17ceb9496c8d34 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-master-taskloop.f90 @@ -0,0 +1,14 @@ +! This test checks lowering of OpenMP parallel master taskloop Directive. + +! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s + +subroutine test_parallel_master_taskloop + integer :: i, j = 1 + !CHECK: not yet implemented: Taskloop construct + !$omp parallel master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop +end subroutine diff --git a/flang/test/Lower/OpenMP/parallel-master.f90 b/flang/test/Lower/OpenMP/parallel-master.f90 new file mode 100644 index 00000000000000..8f3ee31b328537 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-master.f90 @@ -0,0 +1,16 @@ +! This test checks lowering of the parallel master combined construct. + +! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s +! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s + +! CHECK-LABEL: func @_QPparallel_master +subroutine parallel_master(x) + integer :: x + !CHECK: omp.parallel { + !CHECK: omp.master { + !$omp parallel master + x = 1 + !$omp end parallel master + !CHECK: } + !CHECK: } +end subroutine parallel_master diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 index 262075ec9b25d0..8e6f55abd5671c 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -17,7 +17,7 @@ subroutine proc end subroutine proc !CHECK-LABEL: define void @proc_() -!CHECK: call void +!CHECK: call void (ptr, i32, ptr, ...) !CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}}) !CHECK: define internal void @[[OMP_PAR]](ptr {{.*}} %[[TID_ADDR:.*]], ptr noalias diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index e23e38ffb4b013..4d70e1ea4c739a 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -603,10 +603,9 @@ program test_alloc ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 1, i32 0) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 20) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) -! LLVM-COUNT-2: call void %{{.*}}() +! LLVM-COUNT-2: call void %{{[0-9]*}}() -! LLVM: %[[C1_LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[C1_LOAD]], ptr %{{.*}} +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C1:.*]] = load ptr, ptr %[[GEP_TDESC_C1]] ! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -620,8 +619,7 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %[[TMP:.*]] ! LLVM: call void %{{.*}}(ptr %{{.*}}) -! LLVM: %[[LOAD_C2:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_C2]], ptr %{{.*}} +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C2:.*]] = load ptr, ptr %[[GEP_TDESC_C2]] ! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -635,9 +633,7 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %{{.*}}, ptr %{{.*}} ! LLVM: call void %{{.*}}(ptr %{{.*}}) -! LLVM: %[[C3_LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[C3_LOAD]], ptr %{{.*}} - +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C3:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 8 ! LLVM: %[[TDESC_C3:.*]] = load ptr, ptr %[[GEP_TDESC_C3]] ! LLVM: %[[ELE_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -658,8 +654,7 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[BOX7]], ptr %{{.*}} ! LLVM: call void %{{.*}}(ptr %{{.*}}) -! LLVM: %[[C4_LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}} -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[C4_LOAD]], ptr %{{.*}} +! LLVM: call void @llvm.memcpy.p0.p0.i32 ! LLVM: %[[GEP_TDESC_C4:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 8 ! LLVM: %[[TDESC_C4:.*]] = load ptr, ptr %[[GEP_TDESC_C4]] ! LLVM: %[[ELE_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 @@ -686,8 +681,7 @@ program test_alloc ! LLVM-LABEL: define void @_QMpolyPtest_deallocate() ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 ptrtoint (ptr getelementptr (%_QMpolyTp1, ptr null, i32 1) to i64), i32 20240719, i8 0, i8 42, i8 2, i8 1, ptr @_QMpolyEXdtXp1, [1 x i64] zeroinitializer }, ptr %[[ALLOCA1:[0-9]*]] -! LLVM: %[[LOAD:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[ALLOCA1]] -! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD]], ptr %[[ALLOCA2:[0-9]*]] +! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA2:[0-9]+]], ptr %[[ALLOCA1]], i32 40, i1 false) ! LLVM: %{{.*}} = call {} @_FortranAAllocatableInitDerivedForAllocate(ptr %[[ALLOCA2]], ptr @_QMpolyEXdtXp1, i32 0, i32 0) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableDeallocatePolymorphic(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) diff --git a/flang/test/Parser/OpenMP/depobj-construct.f90 b/flang/test/Parser/OpenMP/depobj-construct.f90 new file mode 100644 index 00000000000000..7c474071bc1e67 --- /dev/null +++ b/flang/test/Parser/OpenMP/depobj-construct.f90 @@ -0,0 +1,64 @@ +!RUN: %flang_fc1 -fdebug-unparse -fopenmp -fopenmp-version=52 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s +!RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp -fopenmp-version=52 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine f00 + integer :: x, y + !$omp depobj(x) depend(in: y) +end + +!UNPARSE: SUBROUTINE f00 +!UNPARSE: INTEGER x, y +!UNPARSE: !$OMP DEPOBJ(x) DEPEND(IN:y) +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct +!PARSE-TREE: | Verbatim +!PARSE-TREE: | OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpClause -> Depend -> OmpDependClause -> InOut +!PARSE-TREE: | | OmpTaskDependenceType -> Type = In +!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'y' + +subroutine f01 + integer :: x + !$omp depobj(x) update(out) +end + +!UNPARSE: SUBROUTINE f01 +!UNPARSE: INTEGER x +!UNPARSE: !$OMP DEPOBJ(x) UPDATE(OUT) +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct +!PARSE-TREE: | Verbatim +!PARSE-TREE: | OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpClause -> Update -> OmpUpdateClause -> OmpTaskDependenceType -> Type = Out + +subroutine f02 + integer :: x + !$omp depobj(x) destroy(x) +end + +!UNPARSE: SUBROUTINE f02 +!UNPARSE: INTEGER x +!UNPARSE: !$OMP DEPOBJ(x) DESTROY(x) +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct +!PARSE-TREE: | Verbatim +!PARSE-TREE: | OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpClause -> Destroy -> OmpDestroyClause -> OmpObject -> Designator -> DataRef -> Name = 'x' + +subroutine f03 + integer :: x + !$omp depobj(x) destroy +end + +!UNPARSE: SUBROUTINE f03 +!UNPARSE: INTEGER x +!UNPARSE: !$OMP DEPOBJ(x) DESTROY +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct +!PARSE-TREE: | Verbatim +!PARSE-TREE: | OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpClause -> Destroy -> diff --git a/flang/test/Parser/OpenMP/master-unparse.f90 b/flang/test/Parser/OpenMP/master-unparse.f90 new file mode 100644 index 00000000000000..30c293a521b5d1 --- /dev/null +++ b/flang/test/Parser/OpenMP/master-unparse.f90 @@ -0,0 +1,73 @@ +! RUN: %flang_fc1 -fdebug-unparse -fopenmp %s | FileCheck --ignore-case %s +! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s + +! Check for parsing of master directive + + +subroutine test_master() + integer :: c = 1 + !PARSE-TREE: OmpBeginBlockDirective + !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = master + !CHECK: !$omp master + !$omp master + c = c + 1 + !$omp end master +end subroutine + +subroutine test_master_taskloop_simd() + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = master taskloop simd + !CHECK: !$omp master taskloop simd + !$omp master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop simd +end subroutine + +subroutine test_master_taskloop + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = master taskloop + !CHECK: !$omp master taskloop + !$omp master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop +end subroutine + +subroutine test_parallel_master + integer :: c = 2 + !PARSE-TREE: OmpBeginBlockDirective + !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = parallel master + !CHECK: !$omp parallel master + !$omp parallel master + c = c + 2 + !$omp end parallel master +end subroutine + +subroutine test_parallel_master_taskloop_simd + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = parallel master taskloop simd + !CHECK: !$omp parallel master taskloop simd + !$omp parallel master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop simd +end subroutine + +subroutine test_parallel_master_taskloop + integer :: i, j = 1 + !PARSE-TREE: OmpBeginLoopDirective + !PARSE-TREE-NEXT: OmpLoopDirective -> llvm::omp::Directive = parallel master taskloop + !CHECK: !$omp parallel master taskloop + !$omp parallel master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop +end subroutine diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index 1a7a57b124e9bd..124f1a02d99fba 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -476,14 +476,14 @@ ! 2.13.1 master !$omp parallel - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master a=3.14 !$omp end master !$omp end parallel !$omp parallel - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: NUM_THREADS clause is not allowed on the MASTER directive !$omp master num_threads(4) a=3.14 diff --git a/flang/test/Semantics/OpenMP/depobj-construct-v50.f90 b/flang/test/Semantics/OpenMP/depobj-construct-v50.f90 new file mode 100644 index 00000000000000..e7fa24d521b63b --- /dev/null +++ b/flang/test/Semantics/OpenMP/depobj-construct-v50.f90 @@ -0,0 +1,28 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50 + +subroutine f00 + integer :: obj +!ERROR: A DEPEND clause on a DEPOBJ construct must not have SOURCE, SINK or DEPOBJ as dependence-type + !$omp depobj(obj) depend(source) +end + +subroutine f01 + integer :: obj + integer :: x, y +!ERROR: A DEPEND clause on a DEPOBJ construct must only specify one locator + !$omp depobj(obj) depend(in: x, y) +end + +subroutine f02 + integer :: obj + integer :: x(10) +!WARNING: An iterator-modifier may specify multiple locators, a DEPEND clause on a DEPOBJ construct must only specify one locator + !$omp depobj(obj) depend(iterator(i = 1:10), in: x(i)) +end + +subroutine f03 + integer :: obj, jbo +!ERROR: The DESTROY clause must refer to the same object as the DEPOBJ construct +!PORTABILITY: The object parameter in DESTROY clause in DEPOPJ construct was introduced in OpenMP v5.2 + !$omp depobj(obj) destroy(jbo) +end diff --git a/flang/test/Semantics/OpenMP/depobj-construct-v51.f90 b/flang/test/Semantics/OpenMP/depobj-construct-v51.f90 new file mode 100644 index 00000000000000..fa0c025a110100 --- /dev/null +++ b/flang/test/Semantics/OpenMP/depobj-construct-v51.f90 @@ -0,0 +1,13 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51 + +subroutine f04 + integer :: obj +!ERROR: An UPDATE clause on a DEPOBJ construct must not have SOURCE, SINK or DEPOBJ as dependence-type + !$omp depobj(obj) update(source) +end + +subroutine f05 + integer :: obj +!ERROR: An UPDATE clause on a DEPOBJ construct must not have SOURCE, SINK or DEPOBJ as dependence-type + !$omp depobj(obj) update(depobj) +end diff --git a/flang/test/Semantics/OpenMP/depobj-construct-v52.f90 b/flang/test/Semantics/OpenMP/depobj-construct-v52.f90 new file mode 100644 index 00000000000000..f2e66485c6c801 --- /dev/null +++ b/flang/test/Semantics/OpenMP/depobj-construct-v52.f90 @@ -0,0 +1,15 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52 + +subroutine f00 + integer :: obj +!WARNING: The SOURCE task-dependence-type is deprecated in OpenMP v5.2 +!ERROR: A DEPEND clause on a DEPOBJ construct must not have SOURCE or SINK as dependence-type + !$omp depobj(obj) depend(source) +end + +subroutine f03 + integer :: obj, jbo +!Note: no portability message +!ERROR: The DESTROY clause must refer to the same object as the DEPOBJ construct + !$omp depobj(obj) destroy(jbo) +end diff --git a/flang/test/Semantics/OpenMP/deprecation.f90 b/flang/test/Semantics/OpenMP/deprecation.f90 new file mode 100644 index 00000000000000..e04f43026bbce2 --- /dev/null +++ b/flang/test/Semantics/OpenMP/deprecation.f90 @@ -0,0 +1,59 @@ +! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -Werror + +! Check for deprecation of master directive and its combined/composite variants + +subroutine test_master() + integer :: c = 1 +!WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. + !$omp master + c = c + 1 + !$omp end master +end subroutine + +subroutine test_parallel_master + integer :: c = 2 +!WARNING: OpenMP directive PARALLEL MASTER has been deprecated, please use PARALLEL MASKED instead. + !$omp parallel master + c = c + 2 + !$omp end parallel master +end subroutine + +subroutine test_master_taskloop_simd() + integer :: i, j = 1 +!WARNING: OpenMP directive MASTER TASKLOOP SIMD has been deprecated, please use MASKED TASKLOOP SIMD instead. + !$omp master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop simd +end subroutine + +subroutine test_master_taskloop + integer :: i, j = 1 +!WARNING: OpenMP directive MASTER TASKLOOP has been deprecated, please use MASKED TASKLOOP instead. + !$omp master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end master taskloop +end subroutine + +subroutine test_parallel_master_taskloop_simd + integer :: i, j = 1 +!WARNING: OpenMP directive PARALLEL MASTER TASKLOOP SIMD has been deprecated, please use PARALLEL_MASKED TASKLOOP SIMD instead. + !$omp parallel master taskloop simd + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop simd +end subroutine + +subroutine test_parallel_master_taskloop + integer :: i, j = 1 +!WARNING: OpenMP directive PARALLEL MASTER TASKLOOP has been deprecated, please use PARALLEL MASKED TASKLOOP instead. + !$omp parallel master taskloop + do i=1,10 + j = j + 1 + end do + !$omp end parallel master taskloop +end subroutine diff --git a/flang/test/Semantics/OpenMP/flush02.f90 b/flang/test/Semantics/OpenMP/flush02.f90 index f06719f302fd7a..ed0cf6602d574a 100644 --- a/flang/test/Semantics/OpenMP/flush02.f90 +++ b/flang/test/Semantics/OpenMP/flush02.f90 @@ -80,7 +80,7 @@ !$omp parallel num_threads(4) array = (/1, 2, 3, 4, 5, 6, 7, 8, 9, 10/) - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master !$omp flush (array) !$omp end master diff --git a/flang/test/Semantics/OpenMP/nested-barrier.f90 b/flang/test/Semantics/OpenMP/nested-barrier.f90 index aae283229e330d..7c635d8e23cc0d 100644 --- a/flang/test/Semantics/OpenMP/nested-barrier.f90 +++ b/flang/test/Semantics/OpenMP/nested-barrier.f90 @@ -75,7 +75,7 @@ program omp_nest_barrier end do !$omp end critical - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master do i = 1, 10 k = k + 1 @@ -108,7 +108,7 @@ program omp_nest_barrier end do !$omp end ordered - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master do i = 1, 10 !ERROR: `DISTRIBUTE` region has to be strictly nested inside `TEAMS` region. diff --git a/flang/test/Semantics/OpenMP/nested-master.f90 b/flang/test/Semantics/OpenMP/nested-master.f90 index 069de67cafae28..b21ca5d1415931 100644 --- a/flang/test/Semantics/OpenMP/nested-master.f90 +++ b/flang/test/Semantics/OpenMP/nested-master.f90 @@ -9,7 +9,7 @@ program omp_nest_master !$omp do do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -17,7 +17,7 @@ program omp_nest_master end do !$omp sections - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master do i = 1, 10 @@ -27,7 +27,7 @@ program omp_nest_master !$omp end sections !$omp single - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master do i = 1, 10 @@ -41,7 +41,7 @@ program omp_nest_master !$omp task do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -52,7 +52,7 @@ program omp_nest_master !$omp taskloop do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -63,7 +63,7 @@ program omp_nest_master !$omp target parallel do simd do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: The only OpenMP constructs that can be encountered during execution of a 'SIMD' region are the `ATOMIC` construct, the `LOOP` construct, the `SIMD` construct and the `ORDERED` construct with the `SIMD` clause. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master @@ -75,7 +75,7 @@ program omp_nest_master !$omp critical do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -85,7 +85,7 @@ program omp_nest_master !$omp ordered do i = 1, 10 k = k + 1 - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -99,7 +99,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -116,7 +116,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master j = j -1 !$omp end master @@ -133,7 +133,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 @@ -151,7 +151,7 @@ program omp_nest_master !$omp distribute do k =1, 10 print *, "hello" - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$omp master j = j -1 diff --git a/flang/test/Semantics/OpenMP/nested-teams.f90 b/flang/test/Semantics/OpenMP/nested-teams.f90 index f3b96b0ab43903..06eea12aba5595 100644 --- a/flang/test/Semantics/OpenMP/nested-teams.f90 +++ b/flang/test/Semantics/OpenMP/nested-teams.f90 @@ -42,7 +42,7 @@ program main !$omp end teams end do - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$omp master !ERROR: TEAMS region can only be strictly nested within the implicit parallel region or TARGET region !$omp teams diff --git a/flang/test/Semantics/OpenMP/ordered-simd.f90 b/flang/test/Semantics/OpenMP/ordered-simd.f90 index ed52b759491002..716dc42c28bb64 100644 --- a/flang/test/Semantics/OpenMP/ordered-simd.f90 +++ b/flang/test/Semantics/OpenMP/ordered-simd.f90 @@ -95,7 +95,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP CRITICAL C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$OMP MASTER DO I = 1,N !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region. @@ -108,7 +108,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP ORDERED C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !$OMP MASTER DO I = 1,N !ERROR: `ORDERED` region may not be closely nested inside of `CRITICAL`, `ORDERED`, explicit `TASK` or `TASKLOOP` region. @@ -121,7 +121,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP TASK C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$OMP MASTER DO I = 1,N @@ -136,7 +136,7 @@ SUBROUTINE ORDERED_BAD(N) !$OMP TASKLOOP DO J= 1,N C = C - A * B - !WARNING: OpenMP directive 'master' has been deprecated, please use 'masked' instead. + !WARNING: OpenMP directive MASTER has been deprecated, please use MASKED instead. !ERROR: `MASTER` region may not be closely nested inside of `WORKSHARING`, `LOOP`, `TASK`, `TASKLOOP`, or `ATOMIC` region. !$OMP MASTER DO I = 1,N diff --git a/flang/test/Transforms/debug-tuple-type.fir b/flang/test/Transforms/debug-tuple-type.fir new file mode 100644 index 00000000000000..c9b0d16c06e1ae --- /dev/null +++ b/flang/test/Transforms/debug-tuple-type.fir @@ -0,0 +1,15 @@ +// RUN: fir-opt --add-debug-info --mlir-print-debuginfo %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<>} { + func.func private @fn1(!fir.ref>) + func.func private @_FortranAioOutputDerivedType(!fir.ref>) +} + +// CHECK: #[[F64:.*]] = #llvm.di_basic_type +// CHECK: #[[CU:.*]] = #llvm.di_compile_unit<{{.*}}> +// CHECK: #[[DTY1:.*]] = #llvm.di_derived_type +// CHECK: #[[DTY2:.*]] = #llvm.di_derived_type +// CHECK: #[[COM_TY1:.*]] = #llvm.di_composite_type +// CHECK: #[[COM_TY2:.*]] = #llvm.di_composite_type +// CHECK: #llvm.di_subroutine_type +// CHECK: #llvm.di_subroutine_type diff --git a/flang/tools/CMakeLists.txt b/flang/tools/CMakeLists.txt index 337545ae0d4d7a..1d2d2c608faf95 100644 --- a/flang/tools/CMakeLists.txt +++ b/flang/tools/CMakeLists.txt @@ -12,3 +12,4 @@ add_subdirectory(flang-driver) add_subdirectory(tco) add_subdirectory(f18-parse-demo) add_subdirectory(fir-opt) +add_subdirectory(fir-lsp-server) diff --git a/flang/tools/fir-lsp-server/CMakeLists.txt b/flang/tools/fir-lsp-server/CMakeLists.txt new file mode 100644 index 00000000000000..ff0ced6693b97f --- /dev/null +++ b/flang/tools/fir-lsp-server/CMakeLists.txt @@ -0,0 +1,17 @@ +set(LLVM_LINK_COMPONENTS + Core + Support + AsmParser + ) + +add_flang_tool(fir-lsp-server fir-lsp-server.cpp) + +get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) +get_property(extension_libs GLOBAL PROPERTY MLIR_EXTENSION_LIBS) +target_link_libraries(fir-lsp-server PRIVATE + CUFDialect + FIRDialect + HLFIRDialect + MLIRLspServerLib + ${dialect_libs} + ${extension_libs}) diff --git a/flang/tools/fir-lsp-server/fir-lsp-server.cpp b/flang/tools/fir-lsp-server/fir-lsp-server.cpp new file mode 100644 index 00000000000000..8b724e292b5abb --- /dev/null +++ b/flang/tools/fir-lsp-server/fir-lsp-server.cpp @@ -0,0 +1,9 @@ +#include "mlir/Tools/mlir-lsp-server/MlirLspServerMain.h" +#include "flang/Optimizer/Support/InitFIR.h" + +int main(int argc, char **argv) { + mlir::DialectRegistry registry; + fir::support::registerNonCodegenDialects(registry); + fir::support::addFIRExtensions(registry); + return mlir::failed(mlir::MlirLspServerMain(argc, argv, registry)); +} diff --git a/libc/include/llvm-libc-macros/linux/signal-macros.h b/libc/include/llvm-libc-macros/linux/signal-macros.h index e379fc41efd02f..0b7317ebc9b80a 100644 --- a/libc/include/llvm-libc-macros/linux/signal-macros.h +++ b/libc/include/llvm-libc-macros/linux/signal-macros.h @@ -76,15 +76,12 @@ #define SS_ONSTACK 0x1 #define SS_DISABLE 0x2 -#ifdef __x86_64__ +#if defined(__x86_64__) || defined(__i386__) || defined(__riscv) #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 #elif defined(__aarch64__) #define MINSIGSTKSZ 5120 #define SIGSTKSZ 16384 -#elif defined(__riscv) -#define MINSIGSTKSZ 2048 -#define SIGSTKSZ 8192 #else #error "Signal stack sizes not defined for your platform." #endif diff --git a/libc/include/llvm-libc-types/fexcept_t.h b/libc/include/llvm-libc-types/fexcept_t.h index 60687bd1318aa4..5aa09fbbaffc77 100644 --- a/libc/include/llvm-libc-types/fexcept_t.h +++ b/libc/include/llvm-libc-types/fexcept_t.h @@ -9,6 +9,10 @@ #ifndef LLVM_LIBC_TYPES_FEXCEPT_T_H #define LLVM_LIBC_TYPES_FEXCEPT_T_H -typedef int fexcept_t; +#if defined(__x86_64__) || defined(__i386__) +typedef unsigned short int fexcept_t; +#else +typedef unsigned int fexcept_t; +#endif #endif // LLVM_LIBC_TYPES_FEXCEPT_T_H diff --git a/libc/include/llvm-libc-types/jmp_buf.h b/libc/include/llvm-libc-types/jmp_buf.h index 60e033c6c65a95..f246e6491cf554 100644 --- a/libc/include/llvm-libc-types/jmp_buf.h +++ b/libc/include/llvm-libc-types/jmp_buf.h @@ -19,6 +19,13 @@ typedef struct { __UINT64_TYPE__ r15; __UINTPTR_TYPE__ rsp; __UINTPTR_TYPE__ rip; +#elif defined(__i386__) + long ebx; + long esi; + long edi; + long ebp; + long esp; + long eip; #elif defined(__riscv) /* Program counter. */ long int __pc; diff --git a/libc/src/setjmp/x86_64/longjmp.cpp b/libc/src/setjmp/x86_64/longjmp.cpp index c293c55a6f9fb2..143c9deb11e9aa 100644 --- a/libc/src/setjmp/x86_64/longjmp.cpp +++ b/libc/src/setjmp/x86_64/longjmp.cpp @@ -11,12 +11,34 @@ #include "src/__support/common.h" #include "src/__support/macros/config.h" -#if !defined(LIBC_TARGET_ARCH_IS_X86_64) +#if !defined(LIBC_TARGET_ARCH_IS_X86) #error "Invalid file include" #endif namespace LIBC_NAMESPACE_DECL { +#ifdef __i386__ +[[gnu::naked]] +LLVM_LIBC_FUNCTION(void, longjmp, (jmp_buf, int)) { + asm(R"( + mov 0x4(%%esp), %%ecx + mov 0x8(%%esp), %%eax + cmpl $0x1, %%eax + adcl $0x0, %%eax + + mov %c[ebx](%%ecx), %%ebx + mov %c[esi](%%ecx), %%esi + mov %c[edi](%%ecx), %%edi + mov %c[ebp](%%ecx), %%ebp + mov %c[esp](%%ecx), %%esp + + jmp *%c[eip](%%ecx) + )" ::[ebx] "i"(offsetof(__jmp_buf, ebx)), + [esi] "i"(offsetof(__jmp_buf, esi)), [edi] "i"(offsetof(__jmp_buf, edi)), + [ebp] "i"(offsetof(__jmp_buf, ebp)), [esp] "i"(offsetof(__jmp_buf, esp)), + [eip] "i"(offsetof(__jmp_buf, eip))); +} +#else [[gnu::naked]] LLVM_LIBC_FUNCTION(void, longjmp, (jmp_buf, int)) { asm(R"( @@ -38,5 +60,6 @@ LLVM_LIBC_FUNCTION(void, longjmp, (jmp_buf, int)) { [r15] "i"(offsetof(__jmp_buf, r15)), [rsp] "i"(offsetof(__jmp_buf, rsp)), [rip] "i"(offsetof(__jmp_buf, rip))); } +#endif } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/setjmp/x86_64/setjmp.cpp b/libc/src/setjmp/x86_64/setjmp.cpp index f6e82642edd7da..5ac10fa87b39a3 100644 --- a/libc/src/setjmp/x86_64/setjmp.cpp +++ b/libc/src/setjmp/x86_64/setjmp.cpp @@ -11,12 +11,37 @@ #include "src/__support/macros/config.h" #include "src/setjmp/setjmp_impl.h" -#if !defined(LIBC_TARGET_ARCH_IS_X86_64) +#if !defined(LIBC_TARGET_ARCH_IS_X86) #error "Invalid file include" #endif namespace LIBC_NAMESPACE_DECL { +#ifdef __i386__ +[[gnu::naked]] +LLVM_LIBC_FUNCTION(int, setjmp, (jmp_buf buf)) { + asm(R"( + mov 4(%%esp), %%eax + + mov %%ebx, %c[ebx](%%eax) + mov %%esi, %c[esi](%%eax) + mov %%edi, %c[edi](%%eax) + mov %%ebp, %c[ebp](%%eax) + + lea 4(%%esp), %%ecx + mov %%ecx, %c[esp](%%eax) + + mov (%%esp), %%ecx + mov %%ecx, %c[eip](%%eax) + + xorl %%eax, %%eax + retl)" ::[ebx] "i"(offsetof(__jmp_buf, ebx)), + [esi] "i"(offsetof(__jmp_buf, esi)), [edi] "i"(offsetof(__jmp_buf, edi)), + [ebp] "i"(offsetof(__jmp_buf, ebp)), [esp] "i"(offsetof(__jmp_buf, esp)), + [eip] "i"(offsetof(__jmp_buf, eip)) + : "eax", "ecx"); +} +#else [[gnu::naked]] LLVM_LIBC_FUNCTION(int, setjmp, (jmp_buf buf)) { asm(R"( @@ -41,5 +66,6 @@ LLVM_LIBC_FUNCTION(int, setjmp, (jmp_buf buf)) { [rip] "i"(offsetof(__jmp_buf, rip)) : "rax"); } +#endif } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 78381e46e480dd..22a1876da5369c 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -221,7 +221,7 @@ LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src, return len; size_t n = len < size - 1 ? len : size - 1; inline_memcpy(dst, src, n); - inline_bzero(dst + n, size - n); + dst[n] = '\0'; return len; } @@ -239,11 +239,13 @@ LIBC_INLINE constexpr static char *strrchr_implementation(const char *src, int c) { char ch = static_cast(c); char *last_occurrence = nullptr; - for (; *src; ++src) { + while (true) { if (*src == ch) last_occurrence = const_cast(src); + if (!*src) + return last_occurrence; + ++src; } - return last_occurrence; } } // namespace internal diff --git a/libc/test/UnitTest/LibcTest.h b/libc/test/UnitTest/LibcTest.h index 2b972004e9eeaa..1707c3c0fdcfad 100644 --- a/libc/test/UnitTest/LibcTest.h +++ b/libc/test/UnitTest/LibcTest.h @@ -162,6 +162,14 @@ class Test { (unsigned long long)RHS, LHSStr, RHSStr, Loc); } + // Helper to allow macro invocations like `ASSERT_EQ(foo, nullptr)`. + template , ValType> = nullptr> + bool test(TestCond Cond, ValType LHS, std::nullptr_t, const char *LHSStr, + const char *RHSStr, internal::Location Loc) { + return test(Cond, LHS, static_cast(nullptr), LHSStr, RHSStr, Loc); + } + template < typename ValType, cpp::enable_if_t< diff --git a/libc/test/src/math/exhaustive/sinpif_test.cpp b/libc/test/src/math/exhaustive/sinpif_test.cpp index 8bc1d81eb7e3d2..81abac0b73f27a 100644 --- a/libc/test/src/math/exhaustive/sinpif_test.cpp +++ b/libc/test/src/math/exhaustive/sinpif_test.cpp @@ -7,10 +7,8 @@ //===----------------------------------------------------------------------===// #include "exhaustive_test.h" -#include "mpfr.h" #include "src/math/sinpif.h" #include "utils/MPFRWrapper/MPFRUtils.h" -#include namespace mpfr = LIBC_NAMESPACE::testing::mpfr; diff --git a/libc/test/src/string/StrchrTest.h b/libc/test/src/string/StrchrTest.h index 74e172de95953e..8c3fe5293008a1 100644 --- a/libc/test/src/string/StrchrTest.h +++ b/libc/test/src/string/StrchrTest.h @@ -40,14 +40,16 @@ template struct StrchrTest : public LIBC_NAMESPACE::testing::Test { const char *src = "abcde"; // Should return null terminator. - ASSERT_STREQ(Func(src, '\0'), ""); + const char *nul_terminator = Func(src, '\0'); + ASSERT_NE(nul_terminator, nullptr); + ASSERT_STREQ(nul_terminator, ""); // Source string should not change. ASSERT_STREQ(src, "abcde"); } void characterNotWithinStringShouldReturnNullptr() { // Since 'z' is not within the string, should return nullptr. - ASSERT_STREQ(Func("123?", 'z'), nullptr); + ASSERT_EQ(Func("123?", 'z'), nullptr); } void theSourceShouldNotChange() { @@ -74,11 +76,13 @@ template struct StrchrTest : public LIBC_NAMESPACE::testing::Test { void emptyStringShouldOnlyMatchNullTerminator() { // Null terminator should match. - ASSERT_STREQ(Func("", '\0'), ""); + const char empty_string[] = ""; + ASSERT_EQ(static_cast(Func(empty_string, '\0')), + empty_string); // All other characters should not match. - ASSERT_STREQ(Func("", 'Z'), nullptr); - ASSERT_STREQ(Func("", '3'), nullptr); - ASSERT_STREQ(Func("", '*'), nullptr); + ASSERT_EQ(Func("", 'Z'), nullptr); + ASSERT_EQ(Func("", '3'), nullptr); + ASSERT_EQ(Func("", '*'), nullptr); } }; @@ -114,7 +118,9 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { const char *src = "abcde"; // Should return null terminator. - ASSERT_STREQ(Func(src, '\0'), ""); + const char *nul_terminator = Func(src, '\0'); + ASSERT_NE(nul_terminator, nullptr); + ASSERT_STREQ(nul_terminator, ""); // Source string should not change. ASSERT_STREQ(src, "abcde"); } @@ -122,9 +128,9 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { void findsLastBehindFirstNullTerminator() { static const char src[6] = {'a', 'a', '\0', 'b', '\0', 'c'}; // 'b' is behind a null terminator, so should not be found. - ASSERT_STREQ(Func(src, 'b'), nullptr); + ASSERT_EQ(Func(src, 'b'), nullptr); // Same goes for 'c'. - ASSERT_STREQ(Func(src, 'c'), nullptr); + ASSERT_EQ(Func(src, 'c'), nullptr); // Should find the second of the two a's. ASSERT_STREQ(Func(src, 'a'), "a"); @@ -132,7 +138,7 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { void characterNotWithinStringShouldReturnNullptr() { // Since 'z' is not within the string, should return nullptr. - ASSERT_STREQ(Func("123?", 'z'), nullptr); + ASSERT_EQ(Func("123?", 'z'), nullptr); } void shouldFindLastOfDuplicates() { @@ -146,11 +152,13 @@ template struct StrrchrTest : public LIBC_NAMESPACE::testing::Test { void emptyStringShouldOnlyMatchNullTerminator() { // Null terminator should match. - ASSERT_STREQ(Func("", '\0'), ""); + const char empty_string[] = ""; + ASSERT_EQ(static_cast(Func(empty_string, '\0')), + empty_string); // All other characters should not match. - ASSERT_STREQ(Func("", 'A'), nullptr); - ASSERT_STREQ(Func("", '2'), nullptr); - ASSERT_STREQ(Func("", '*'), nullptr); + ASSERT_EQ(Func("", 'A'), nullptr); + ASSERT_EQ(Func("", '2'), nullptr); + ASSERT_EQ(Func("", '*'), nullptr); } }; diff --git a/libc/test/src/string/strlcat_test.cpp b/libc/test/src/string/strlcat_test.cpp index 1ffa4b0e921e2b..5757fc92b39d2a 100644 --- a/libc/test/src/string/strlcat_test.cpp +++ b/libc/test/src/string/strlcat_test.cpp @@ -27,6 +27,15 @@ TEST(LlvmLibcStrlcatTest, Smaller) { EXPECT_STREQ(buf, "abcd"); } +TEST(LlvmLibcStrlcatTest, SmallerNoOverwriteAfter0) { + const char *str = "cd"; + char buf[8]{"ab\0\0efg"}; + + EXPECT_EQ(LIBC_NAMESPACE::strlcat(buf, str, 8), size_t(4)); + EXPECT_STREQ(buf, "abcd"); + EXPECT_STREQ(buf + 5, "fg"); +} + TEST(LlvmLibcStrlcatTest, No0) { const char *str = "cd"; char buf[7]{"ab"}; diff --git a/libc/test/src/string/strlcpy_test.cpp b/libc/test/src/string/strlcpy_test.cpp index 5a1e30c12963f3..ecf0e925a265c3 100644 --- a/libc/test/src/string/strlcpy_test.cpp +++ b/libc/test/src/string/strlcpy_test.cpp @@ -25,6 +25,5 @@ TEST(LlvmLibcStrlcpyTest, Smaller) { EXPECT_EQ(LIBC_NAMESPACE::strlcpy(buf, str, 7), size_t(3)); EXPECT_STREQ(buf, "abc"); - for (const char *p = buf + 3; p < buf + 7; p++) - EXPECT_EQ(*p, '\0'); + EXPECT_STREQ(buf + 4, "11"); } diff --git a/libc/test/src/sys/statvfs/linux/CMakeLists.txt b/libc/test/src/sys/statvfs/linux/CMakeLists.txt index 1f8688868e0438..fa1e9052d1cac4 100644 --- a/libc/test/src/sys/statvfs/linux/CMakeLists.txt +++ b/libc/test/src/sys/statvfs/linux/CMakeLists.txt @@ -8,8 +8,9 @@ add_libc_unittest( statvfs_test.cpp DEPENDS libc.src.errno.errno - libc.src.sys.statvfs.linux.statfs_utils libc.src.sys.statvfs.statvfs + libc.src.sys.stat.mkdirat + libc.src.sys.stat.rmdir libc.test.UnitTest.ErrnoSetterMatcher ) @@ -21,8 +22,9 @@ add_libc_unittest( fstatvfs_test.cpp DEPENDS libc.src.errno.errno - libc.src.sys.statvfs.linux.statfs_utils libc.src.sys.statvfs.fstatvfs + libc.src.sys.stat.mkdirat + libc.src.sys.stat.rmdir libc.src.fcntl.open libc.src.unistd.close libc.test.UnitTest.ErrnoSetterMatcher diff --git a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp index 2f3e0b96ff0957..efd1e688280b5f 100644 --- a/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp +++ b/libc/test/src/sys/statvfs/linux/fstatvfs_test.cpp @@ -1,49 +1,56 @@ +//===-- Unittests for fstatvfs --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" #include "src/fcntl/open.h" +#include "src/sys/stat/mkdirat.h" #include "src/sys/statvfs/fstatvfs.h" -#include "src/sys/statvfs/linux/statfs_utils.h" #include "src/unistd/close.h" +#include "src/unistd/rmdir.h" #include "test/UnitTest/ErrnoSetterMatcher.h" -#include "test/UnitTest/LibcTest.h" -#include +#include "test/UnitTest/Test.h" + using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; -#ifdef SYS_statfs64 -using StatFs = statfs64; -#else -using StatFs = statfs; -#endif - -namespace LIBC_NAMESPACE_DECL { -static int fstatfs(int fd, StatFs *buf) { - using namespace statfs_utils; - if (cpp::optional result = linux_fstatfs(fd)) { - *buf = *result; - return 0; - } - return -1; -} -} // namespace LIBC_NAMESPACE_DECL - -struct PathFD { - int fd; - explicit PathFD(const char *path) - : fd(LIBC_NAMESPACE::open(path, O_CLOEXEC | O_PATH)) {} - ~PathFD() { LIBC_NAMESPACE::close(fd); } - operator int() const { return fd; } -}; - -TEST(LlvmLibcSysStatvfsTest, FstatfsBasic) { - StatFs buf; - ASSERT_THAT(LIBC_NAMESPACE::fstatfs(PathFD("/"), &buf), Succeeds()); - ASSERT_THAT(LIBC_NAMESPACE::fstatfs(PathFD("/proc"), &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(PROC_SUPER_MAGIC)); - ASSERT_THAT(LIBC_NAMESPACE::fstatfs(PathFD("/sys"), &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(SYSFS_MAGIC)); +TEST(LlvmLibcSysFStatvfsTest, FStatvfsBasic) { + struct statvfs buf; + + int fd = LIBC_NAMESPACE::open("/", O_PATH); + ASSERT_ERRNO_SUCCESS(); + ASSERT_GT(fd, 0); + + // The root of the file directory must always exist + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Succeeds()); + ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0)); } -TEST(LlvmLibcSysStatvfsTest, FstatvfsInvalidFD) { +TEST(LlvmLibcSysFStatvfsTest, FStatvfsInvalidPath) { struct statvfs buf; - ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(-1, &buf), Fails(EBADF)); + + constexpr const char *FILENAME = "testdata/statvfs.testdir"; + auto TEST_DIR = libc_make_test_file_path(FILENAME); + + ASSERT_THAT(LIBC_NAMESPACE::mkdirat(AT_FDCWD, TEST_DIR, S_IRWXU), + Succeeds(0)); + + int fd = LIBC_NAMESPACE::open(TEST_DIR, O_PATH); + ASSERT_ERRNO_SUCCESS(); + ASSERT_GT(fd, 0); + + // create the file, assert it exists, then delete it and assert it doesn't + // exist anymore. + + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Succeeds()); + + ASSERT_THAT(LIBC_NAMESPACE::rmdir(TEST_DIR), Succeeds(0)); + + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Fails(ENOENT)); + ASSERT_THAT(LIBC_NAMESPACE::close(fd), Succeeds(0)); + ASSERT_THAT(LIBC_NAMESPACE::fstatvfs(fd, &buf), Fails(ENOENT)); } diff --git a/libc/test/src/sys/statvfs/linux/statvfs_test.cpp b/libc/test/src/sys/statvfs/linux/statvfs_test.cpp index 5329adb54d64d0..0b154e7aa3fb7c 100644 --- a/libc/test/src/sys/statvfs/linux/statvfs_test.cpp +++ b/libc/test/src/sys/statvfs/linux/statvfs_test.cpp @@ -1,54 +1,43 @@ +//===-- Unittests for statvfs ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/fcntl_macros.h" #include "src/__support/macros/config.h" -#include "src/sys/statvfs/linux/statfs_utils.h" +#include "src/sys/stat/mkdirat.h" #include "src/sys/statvfs/statvfs.h" +#include "src/unistd/rmdir.h" #include "test/UnitTest/ErrnoSetterMatcher.h" -#include "test/UnitTest/LibcTest.h" -#include +#include "test/UnitTest/Test.h" + using namespace LIBC_NAMESPACE::testing::ErrnoSetterMatcher; -#ifdef SYS_statfs64 -using StatFs = statfs64; -#else -using StatFs = statfs; -#endif - -namespace LIBC_NAMESPACE_DECL { -static int statfs(const char *path, StatFs *buf) { - using namespace statfs_utils; - if (cpp::optional result = linux_statfs(path)) { - *buf = *result; - return 0; - } - return -1; -} -} // namespace LIBC_NAMESPACE_DECL - -TEST(LlvmLibcSysStatfsTest, StatfsBasic) { - StatFs buf; - ASSERT_THAT(LIBC_NAMESPACE::statfs("/", &buf), Succeeds()); - ASSERT_THAT(LIBC_NAMESPACE::statfs("/proc", &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(PROC_SUPER_MAGIC)); - ASSERT_THAT(LIBC_NAMESPACE::statfs("/sys", &buf), Succeeds()); - ASSERT_EQ(buf.f_type, static_cast(SYSFS_MAGIC)); +TEST(LlvmLibcSysStatvfsTest, StatvfsBasic) { + struct statvfs buf; + // The root of the file directory must always exist + ASSERT_THAT(LIBC_NAMESPACE::statvfs("/", &buf), Succeeds()); } -TEST(LlvmLibcSysStatfsTest, StatvfsInvalidPath) { +TEST(LlvmLibcSysStatvfsTest, StatvfsInvalidPath) { struct statvfs buf; + ASSERT_THAT(LIBC_NAMESPACE::statvfs("", &buf), Fails(ENOENT)); - ASSERT_THAT(LIBC_NAMESPACE::statvfs("/nonexistent", &buf), Fails(ENOENT)); - ASSERT_THAT(LIBC_NAMESPACE::statvfs("/dev/null/whatever", &buf), - Fails(ENOTDIR)); - ASSERT_THAT(LIBC_NAMESPACE::statvfs(nullptr, &buf), Fails(EFAULT)); -} -TEST(LlvmLibcSysStatfsTest, StatvfsNameTooLong) { - struct statvfs buf; - ASSERT_THAT(LIBC_NAMESPACE::statvfs("/", &buf), Succeeds()); - char *name = static_cast(__builtin_alloca(buf.f_namemax + 3)); - name[0] = '/'; - name[buf.f_namemax + 2] = '\0'; - for (unsigned i = 1; i < buf.f_namemax + 2; ++i) { - name[i] = 'a'; - } - ASSERT_THAT(LIBC_NAMESPACE::statvfs(name, &buf), Fails(ENAMETOOLONG)); + // create the file, assert it exists, then delete it and assert it doesn't + // exist anymore. + constexpr const char *FILENAME = "testdata/statvfs.testdir"; + auto TEST_DIR = libc_make_test_file_path(FILENAME); + + ASSERT_THAT(LIBC_NAMESPACE::mkdirat(AT_FDCWD, TEST_DIR, S_IRWXU), + Succeeds(0)); + + ASSERT_THAT(LIBC_NAMESPACE::statvfs(TEST_DIR, &buf), Succeeds()); + + ASSERT_THAT(LIBC_NAMESPACE::rmdir(TEST_DIR), Succeeds(0)); + + ASSERT_THAT(LIBC_NAMESPACE::statvfs(TEST_DIR, &buf), Fails(ENOENT)); } diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst index 38b8df3b2a7718..bf3aafe6139ee9 100644 --- a/libcxx/docs/ReleaseNotes/20.rst +++ b/libcxx/docs/ReleaseNotes/20.rst @@ -38,6 +38,7 @@ What's New in Libc++ 20.0.0? Implemented Papers ------------------ +- P0619R4: Reviewing Deprecated Facilities of C++17 for C++20 (`Github `__) - P2747R2: ``constexpr`` placement new (`Github `__) - P2609R3: Relaxing Ranges Just A Smidge (`Github `__) - P2985R0: A type trait for detecting virtual base classes (`Github `__) @@ -89,6 +90,11 @@ Deprecations and Removals the ``_LIBCPP_VERBOSE_ABORT_NOT_NOEXCEPT`` macro can be defined to make the function non-``noexcept``. That macro will be removed in LLVM 21. +- ````, ```` (previously missing), ````, and ```` are deprecated since C++17 as + specified by the standard. They, together with ````, are removed in C++20, but libc++ still provides these + headers as an extension and only deprecates them. The ``_LIBCPP_DISABLE_DEPRECATION_WARNINGS`` macro can be defined to + suppress deprecation for these headers. + Upcoming Deprecations and Removals ---------------------------------- diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv index 9a057be8ad0519..5cd77be4d58def 100644 --- a/libcxx/docs/Status/Cxx20Papers.csv +++ b/libcxx/docs/Status/Cxx20Papers.csv @@ -34,7 +34,7 @@ "`P0528R3 `__","The Curious Case of Padding Bits, Featuring Atomic Compare-and-Exchange","2018-06 (Rapperswil)","","","" "`P0542R5 `__","Support for contract based programming in C++","2018-06 (Rapperswil)","|Nothing To Do|","n/a","Pulled at the 2019-07 meeting in Cologne" "`P0556R3 `__","Integral power-of-2 operations","2018-06 (Rapperswil)","|Complete|","9.0","" -"`P0619R4 `__","Reviewing Deprecated Facilities of C++17 for C++20","2018-06 (Rapperswil)","|Partial|","","Only sections D.7, D.8, D.9, D.10, D.11, D.12, and D.13 are implemented. Section D.4 remains undone." +"`P0619R4 `__","Reviewing Deprecated Facilities of C++17 for C++20","2018-06 (Rapperswil)","|Complete|","20.0","Removed headers are still provided as an extension, but with deprecation warnings" "`P0646R1 `__","Improving the Return Value of Erase-Like Algorithms","2018-06 (Rapperswil)","|Complete|","10.0","" "`P0722R3 `__","Efficient sized delete for variable sized classes","2018-06 (Rapperswil)","|Complete|","9.0","" "`P0758R1 `__","Implicit conversion traits and utility functions","2018-06 (Rapperswil)","|Complete|","","" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index e84a55e25f2fa4..87eaf64b245017 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -923,6 +923,7 @@ set(files coroutine csetjmp csignal + cstdalign cstdarg cstdbool cstddef diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index 1f38f5d2d99b43..ab3c598418828a 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -42,6 +42,9 @@ struct __less { } }; +template +inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value; diff --git a/libcxx/include/__algorithm/ranges_minmax.h b/libcxx/include/__algorithm/ranges_minmax.h index 4f2b2bf26382da..5f2e5cb2a1eeab 100644 --- a/libcxx/include/__algorithm/ranges_minmax.h +++ b/libcxx/include/__algorithm/ranges_minmax.h @@ -89,7 +89,7 @@ struct __minmax { // vectorize the code. if constexpr (contiguous_range<_Range> && is_integral_v<_ValueT> && __is_cheap_to_copy<_ValueT> & __is_identity<_Proj>::value && - __desugars_to_v<__totally_ordered_less_tag, _Comp, _ValueT, _ValueT>) { + __desugars_to_v<__less_tag, _Comp, _ValueT, _ValueT>) { minmax_result<_ValueT> __result = {__r[0], __r[0]}; for (auto __e : __r) { if (__e < __result.min) diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h index 0b2137dee2f77e..39868b8b6a30ae 100644 --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -27,11 +27,13 @@ #include <__functional/ranges_operations.h> #include <__iterator/iterator_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_same.h> +#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -47,110 +49,11 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// stable, 2-3 compares, 0-2 swaps - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned -__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - - unsigned __r = 0; - if (!__c(*__y, *__x)) // if x <= y - { - if (!__c(*__z, *__y)) // if y <= z - return __r; // x <= y && y <= z - // x <= y && y > z - _Ops::iter_swap(__y, __z); // x <= z && y < z - __r = 1; - if (__c(*__y, *__x)) // if x > y - { - _Ops::iter_swap(__x, __y); // x < y && y <= z - __r = 2; - } - return __r; // x <= y && y < z - } - if (__c(*__z, *__y)) // x > y, if y > z - { - _Ops::iter_swap(__x, __z); // x < y && y < z - __r = 1; - return __r; - } - _Ops::iter_swap(__x, __y); // x > y && y <= z - __r = 1; // x < y && x <= z - if (__c(*__z, *__y)) // if y > z - { - _Ops::iter_swap(__y, __z); // x <= y && y < z - __r = 2; - } - return __r; -} // x <= y && y <= z - -// stable, 3-6 compares, 0-5 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); - if (__c(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__c(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__c(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } -} - -// stable, 4-10 compares, 0-9 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort5(_ForwardIterator __x1, - _ForwardIterator __x2, - _ForwardIterator __x3, - _ForwardIterator __x4, - _ForwardIterator __x5, - _Comp __comp) { - using _Ops = _IterOps<_AlgPolicy>; - - std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp); - if (__comp(*__x5, *__x4)) { - _Ops::iter_swap(__x4, __x5); - if (__comp(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__comp(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__comp(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } - } -} - -// The comparator being simple is a prerequisite for using the branchless optimization. -template -struct __is_simple_comparator : false_type {}; -template <> -struct __is_simple_comparator<__less<>&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -#if _LIBCPP_STD_VER >= 20 -template <> -struct __is_simple_comparator : true_type {}; -template <> -struct __is_simple_comparator : true_type {}; -#endif - template ::value_type> -using __use_branchless_sort = - integral_constant::value && sizeof(_Tp) <= sizeof(void*) && - is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>; +inline const bool __use_branchless_sort = + __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value && + (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> || + __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>); namespace __detail { @@ -161,59 +64,88 @@ enum { __block_size = sizeof(uint64_t) * 8 }; // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary. template -inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; bool __r = __c(*__x, *__y); value_type __tmp = __r ? *__x : *__y; *__y = __r ? *__y : *__x; *__x = __tmp; + return !__r; } // Ensures that *__x, *__y and *__z are ordered according to the comparator __c, // under the assumption that *__y and *__z are already ordered. template -inline _LIBCPP_HIDE_FROM_ABI void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; - bool __r = __c(*__z, *__x); - value_type __tmp = __r ? *__z : *__x; - *__z = __r ? *__x : *__z; - __r = __c(__tmp, *__y); - *__x = __r ? *__x : *__y; - *__y = __r ? *__y : __tmp; + bool __r1 = __c(*__z, *__x); + value_type __tmp = __r1 ? *__z : *__x; + *__z = __r1 ? *__x : *__z; + bool __r2 = __c(__tmp, *__y); + *__x = __r2 ? *__x : *__y; + *__y = __r2 ? *__y : __tmp; + return !__r1 || !__r2; } +// stable, 2-3 compares, 0-2 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__cond_swap<_Compare>(__x2, __x3, __c); - std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { + bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c); + bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + return __swapped1 || __swapped2; } template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); -} + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return false; // x <= y && y <= z + // x <= y && y > z + _Ops::iter_swap(__y, __z); // x <= z && y < z + if (__c(*__y, *__x)) // if x > y + _Ops::iter_swap(__x, __y); // x < y && y <= z + return true; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + _Ops::iter_swap(__x, __z); // x < y && y < z + return true; + } + _Ops::iter_swap(__x, __y); // x > y && y <= z + // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + _Ops::iter_swap(__y, __z); // x <= y && y < z + return true; +} // x <= y && y <= z + +// stable, 3-6 compares, 0-5 swaps template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x3, __c); std::__cond_swap<_Compare>(__x2, __x4, __c); std::__cond_swap<_Compare>(__x1, __x2, __c); @@ -224,27 +156,39 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { - std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); + if (__c(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__c(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__c(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } } +// stable, 4-10 compares, 0-9 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x2, __c); std::__cond_swap<_Compare>(__x4, __x5, __c); std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c); @@ -256,16 +200,29 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { - std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>( - std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + + std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp); + if (__comp(*__x5, *__x4)) { + _Ops::iter_swap(__x4, __x5); + if (__comp(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__comp(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__comp(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } + } } // Assumes size > 0 @@ -355,14 +312,14 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator _Ops::iter_swap(__first, __last); return true; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); return true; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort4<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return true; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort5<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), @@ -373,7 +330,7 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator } typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; _RandomAccessIterator __j = __first + difference_type(2); - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); const unsigned __limit = 8; unsigned __count = 0; for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { @@ -780,14 +737,14 @@ void __introsort(_RandomAccessIterator __first, _Ops::iter_swap(__first, __last); return; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); return; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort4<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort5<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), @@ -928,10 +885,8 @@ __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co // Only use bitset partitioning for arithmetic types. We should also check // that the default comparator is in use so that we are sure that there are no // branches in the comparator. - std::__introsort<_AlgPolicy, - _Comp&, - _RandomAccessIterator, - __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(__first, __last, __comp, __depth_limit); + std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >( + __first, __last, __comp, __depth_limit); } template diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h index 6022bd679ed3e3..67d9da289aead3 100644 --- a/libcxx/include/__functional/operations.h +++ b/libcxx/include/__functional/operations.h @@ -362,6 +362,9 @@ struct _LIBCPP_TEMPLATE_VIS less : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less); +template +inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value; @@ -377,6 +380,9 @@ struct _LIBCPP_TEMPLATE_VIS less { typedef void is_transparent; }; +template +inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true; + template inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value; #endif @@ -446,6 +452,9 @@ struct _LIBCPP_TEMPLATE_VIS greater : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(greater); +template +inline const bool __desugars_to_v<__greater_tag, greater<_Tp>, _Tp, _Tp> = true; + #if _LIBCPP_STD_VER >= 14 template <> struct _LIBCPP_TEMPLATE_VIS greater { @@ -457,6 +466,9 @@ struct _LIBCPP_TEMPLATE_VIS greater { } typedef void is_transparent; }; + +template +inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true; #endif // Logical operations diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index f023d765a6c8ab..df95843e7c9af6 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -102,6 +102,12 @@ inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = tru template inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true; +template +inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; + +template +inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true; + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__type_traits/desugars_to.h b/libcxx/include/__type_traits/desugars_to.h index b0ce7c414e5d77..452c70bfbad66d 100644 --- a/libcxx/include/__type_traits/desugars_to.h +++ b/libcxx/include/__type_traits/desugars_to.h @@ -25,6 +25,12 @@ struct __equal_tag {}; // syntactically, the operation is equivalent to calling `a + b` struct __plus_tag {}; +// syntactically, the operation is equivalent to calling `a < b` +struct __less_tag {}; + +// syntactically, the operation is equivalent to calling `a > b` +struct __greater_tag {}; + // syntactically, the operation is equivalent to calling `a < b`, and these expressions // have to be true for any `a` and `b`: // - `(a < b) == (b > a)` diff --git a/libcxx/include/__type_traits/is_trivially_copyable.h b/libcxx/include/__type_traits/is_trivially_copyable.h index e92af126ee94d9..8eb3ba7581af15 100644 --- a/libcxx/include/__type_traits/is_trivially_copyable.h +++ b/libcxx/include/__type_traits/is_trivially_copyable.h @@ -27,10 +27,8 @@ template inline constexpr bool is_trivially_copyable_v = __is_trivially_copyable(_Tp); #endif -#if _LIBCPP_STD_VER >= 20 template -inline constexpr bool __is_cheap_to_copy = is_trivially_copyable_v<_Tp> && sizeof(_Tp) <= sizeof(std::intmax_t); -#endif +inline const bool __is_cheap_to_copy = __is_trivially_copyable(_Tp) && sizeof(_Tp) <= sizeof(std::intmax_t); _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/ccomplex b/libcxx/include/ccomplex index 94d2c8d7d003d4..d379c9e7f0174a 100644 --- a/libcxx/include/ccomplex +++ b/libcxx/include/ccomplex @@ -23,4 +23,16 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_ccomplex _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") = void; +using __use_standard_header_ccomplex = __standard_header_ccomplex; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_ccomplex _LIBCPP_DEPRECATED_("Include instead.") = void; +using __use_standard_header_ccomplex = __standard_header_ccomplex; + +#endif + #endif // _LIBCPP_CCOMPLEX diff --git a/libcxx/include/ciso646 b/libcxx/include/ciso646 index 1d859f08fac572..5fcac79e38a7f2 100644 --- a/libcxx/include/ciso646 +++ b/libcxx/include/ciso646 @@ -21,4 +21,11 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_ciso646 _LIBCPP_DEPRECATED_("removed in C++20. Include instead.") = void; +using __use_standard_header_ciso646 = __standard_header_ciso646; + +#endif + #endif // _LIBCPP_CISO646 diff --git a/libcxx/include/complex b/libcxx/include/complex index 4030d96b003d56..15e42800fbfa0a 100644 --- a/libcxx/include/complex +++ b/libcxx/include/complex @@ -1097,20 +1097,20 @@ inline _LIBCPP_HIDE_FROM_ABI complex<_Tp> pow(const complex<_Tp>& __x, const com return std::exp(__y * std::log(__x)); } -template +template ::value && is_floating_point<_Up>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI complex::type> pow(const complex<_Tp>& __x, const complex<_Up>& __y) { typedef complex::type> result_type; return std::pow(result_type(__x), result_type(__y)); } -template ::value, int> = 0> +template ::value && is_arithmetic<_Up>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI complex::type> pow(const complex<_Tp>& __x, const _Up& __y) { typedef complex::type> result_type; return std::pow(result_type(__x), result_type(__y)); } -template ::value, int> = 0> +template ::value && is_floating_point<_Up>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI complex::type> pow(const _Tp& __x, const complex<_Up>& __y) { typedef complex::type> result_type; return std::pow(result_type(__x), result_type(__y)); diff --git a/libcxx/include/complex.h b/libcxx/include/complex.h index a3da21c843f365..89595ae2068a64 100644 --- a/libcxx/include/complex.h +++ b/libcxx/include/complex.h @@ -24,7 +24,7 @@ #endif #ifdef __cplusplus -# include +# include #elif __has_include_next() # include_next #endif diff --git a/libcxx/include/cstdalign b/libcxx/include/cstdalign new file mode 100644 index 00000000000000..e6a2a3c7177422 --- /dev/null +++ b/libcxx/include/cstdalign @@ -0,0 +1,55 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_CSTDALIGN +#define _LIBCPP_CSTDALIGN + +/* + cstdalign synopsis + +Macros: + + __alignas_is_defined + __alignof_is_defined + +*/ + +#include <__config> + +// is not provided by libc++ +#if __has_include() +# include +# ifdef _LIBCPP_STDALIGN_H +# error "If libc++ starts defining , the __has_include check should move to libc++'s " +# endif +#endif + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#undef __alignas_is_defined +#define __alignas_is_defined 1 + +#undef __alignof_is_defined +#define __alignof_is_defined 1 + +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_cstdalign _LIBCPP_DEPRECATED_("removed in C++20.") = void; +using __use_standard_header_cstdalign = __standard_header_cstdalign; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_cstdalign _LIBCPP_DEPRECATED = void; +using __use_standard_header_cstdalign = __standard_header_cstdalign; + +#endif + +#endif // _LIBCPP_CSTDALIGN diff --git a/libcxx/include/cstdbool b/libcxx/include/cstdbool index ef731c021a4ab8..1d627258e10c09 100644 --- a/libcxx/include/cstdbool +++ b/libcxx/include/cstdbool @@ -28,4 +28,16 @@ Macros: #undef __bool_true_false_are_defined #define __bool_true_false_are_defined 1 +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_cstdbool _LIBCPP_DEPRECATED_("removed in C++20.") = void; +using __use_standard_header_cstdbool = __standard_header_cstdbool; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_cstdbool _LIBCPP_DEPRECATED = void; +using __use_standard_header_cstdbool = __standard_header_cstdbool; + +#endif + #endif // _LIBCPP_CSTDBOOL diff --git a/libcxx/include/ctgmath b/libcxx/include/ctgmath index 6237979be4906c..7dbe952f021b74 100644 --- a/libcxx/include/ctgmath +++ b/libcxx/include/ctgmath @@ -18,11 +18,23 @@ */ -#include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 20 + +using __standard_header_ctgmath _LIBCPP_DEPRECATED_("removed in C++20. Include and instead.") = void; +using __use_standard_header_ctgmath = __standard_header_ctgmath; + +#elif _LIBCPP_STD_VER >= 17 + +using __standard_header_ctgmath _LIBCPP_DEPRECATED_("Include and instead.") = void; +using __use_standard_header_ctgmath = __standard_header_ctgmath; + +#endif + #endif // _LIBCPP_CTGMATH diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index c3d08000731992..af8c3c15eb2767 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1109,6 +1109,11 @@ module std [system] { export * } + module cstdalign { + header "cstdalign" + export * + } + module cstdarg { header "cstdarg" export * diff --git a/libcxx/include/tgmath.h b/libcxx/include/tgmath.h index e6f0a4ab2611fa..1c5058cb065a91 100644 --- a/libcxx/include/tgmath.h +++ b/libcxx/include/tgmath.h @@ -24,7 +24,8 @@ #endif #ifdef __cplusplus -# include +# include +# include #else # if __has_include_next() # include_next diff --git a/libcxx/src/algorithm.cpp b/libcxx/src/algorithm.cpp index af9d60a8e271e8..a7c39b5e5183a4 100644 --- a/libcxx/src/algorithm.cpp +++ b/libcxx/src/algorithm.cpp @@ -21,8 +21,7 @@ void __sort(RandomAccessIterator first, RandomAccessIterator last, Comp comp) { std::__introsort<_ClassicAlgPolicy, ranges::less, RandomAccessIterator, - __use_branchless_sort::value>( - first, last, ranges::less{}, depth_limit); + __use_branchless_sort>(first, last, ranges::less{}, depth_limit); } // clang-format off diff --git a/libcxx/test/libcxx/clang_modules_include.gen.py b/libcxx/test/libcxx/clang_modules_include.gen.py index bc028f2a0809aa..b897984f898819 100644 --- a/libcxx/test/libcxx/clang_modules_include.gen.py +++ b/libcxx/test/libcxx/clang_modules_include.gen.py @@ -17,10 +17,15 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp // RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only @@ -41,9 +46,11 @@ // UNSUPPORTED: LIBCXX-PICOLIBC-FIXME {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> -""") +""" + ) print( f"""\ diff --git a/libcxx/test/libcxx/double_include.gen.py b/libcxx/test/libcxx/double_include.gen.py index afc2947dbece94..f58e72f94a3533 100644 --- a/libcxx/test/libcxx/double_include.gen.py +++ b/libcxx/test/libcxx/double_include.gen.py @@ -15,12 +15,18 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - print(f"""\ + print( + f"""\ //--- {header}.sh.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} // RUN: %{{cxx}} -c %s -o %t.first.o %{{flags}} %{{compile_flags}} // RUN: %{{cxx}} -c %s -o %t.second.o -DWITH_MAIN %{{flags}} %{{compile_flags}} @@ -32,4 +38,5 @@ #if defined(WITH_MAIN) int main(int, char**) {{ return 0; }} #endif -""") +""" + ) diff --git a/libcxx/test/libcxx/header_inclusions.gen.py b/libcxx/test/libcxx/header_inclusions.gen.py index e5def1ad4cb70d..739caf915c09a0 100644 --- a/libcxx/test/libcxx/header_inclusions.gen.py +++ b/libcxx/test/libcxx/header_inclusions.gen.py @@ -12,32 +12,43 @@ # RUN: %{python} %s %{libcxx-dir}/utils import sys + sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers, mandatory_inclusions +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, + mandatory_inclusions, +) for header in public_headers: - header_guard = lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}" + header_guard = ( + lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}" + ) - # has no header guards - if header == 'cassert': - checks = '' - else: - checks = f''' + # has no header guards + if header == "cassert": + checks = "" + else: + checks = f""" #ifndef {header_guard(header)} # error <{header}> was expected to define a header guard {header_guard(header)} #endif -''' - for includee in mandatory_inclusions.get(header, []): - checks += f''' +""" + for includee in mandatory_inclusions.get(header, []): + checks += f""" #ifndef {header_guard(includee)} # error <{header}> was expected to include <{includee}> #endif -''' +""" - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> {checks} -""") +""" + ) diff --git a/libcxx/test/libcxx/include_as_c.sh.cpp b/libcxx/test/libcxx/include_as_c.sh.cpp index c9f8dfd9a5a922..204b830462cf94 100644 --- a/libcxx/test/libcxx/include_as_c.sh.cpp +++ b/libcxx/test/libcxx/include_as_c.sh.cpp @@ -34,6 +34,7 @@ #endif #include #include +#include #include #include #include diff --git a/libcxx/test/libcxx/libcpp_version.gen.py b/libcxx/test/libcxx/libcpp_version.gen.py index a9995295e21e4f..b30623fe2c388b 100644 --- a/libcxx/test/libcxx/libcpp_version.gen.py +++ b/libcxx/test/libcxx/libcpp_version.gen.py @@ -12,16 +12,23 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> #ifndef _LIBCPP_VERSION # error <{header}> does not seem to define _LIBCPP_VERSION #endif -""") +""" + ) diff --git a/libcxx/test/libcxx/no_assert_include.gen.py b/libcxx/test/libcxx/no_assert_include.gen.py index 67ab98603ca8fd..e0dbc3d815f31b 100644 --- a/libcxx/test/libcxx/no_assert_include.gen.py +++ b/libcxx/test/libcxx/no_assert_include.gen.py @@ -12,20 +12,28 @@ # RUN: %{python} %s %{libcxx-dir}/utils import sys + sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: - if header == 'cassert': - continue + if header == "cassert": + continue - print(f"""\ + print( + f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #include <{header}> #ifdef assert # error "Do not include cassert or assert.h in standard header files" #endif -""") +""" + ) diff --git a/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp b/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp new file mode 100644 index 00000000000000..1c790c283e4387 --- /dev/null +++ b/libcxx/test/libcxx/numerics/complex.number/cmplx.over.pow.pass.cpp @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// template complex<__promote::type> pow(const complex&, const U&); +// template complex<__promote::type> pow(const complex&, const complex&); +// template complex<__promote::type> pow(const T&, const complex&); + +// Test that these additional overloads are free from catching std::complex, +// which is expected by several 3rd party libraries, see https://github.com/llvm/llvm-project/issues/109858. +// +// Note that we reserve the right to break this in the future if we have a reason to, but for the time being, +// make sure we don't break this property unintentionally. +#include +#include +#include +#include + +#include "test_macros.h" + +namespace usr { +struct usr_tag {}; + +template +typename std::enable_if<(std::is_same::value && std::is_floating_point::value) || + (std::is_floating_point::value && std::is_same::value), + int>::type +pow(const T&, const std::complex&) { + return std::is_same::value ? 0 : 1; +} + +template +typename std::enable_if<(std::is_same::value && std::is_floating_point::value) || + (std::is_floating_point::value && std::is_same::value), + int>::type +pow(const std::complex&, const U&) { + return std::is_same::value ? 2 : 3; +} + +template +typename std::enable_if<(std::is_same::value && std::is_floating_point::value) || + (std::is_floating_point::value && std::is_same::value), + int>::type +pow(const std::complex&, const std::complex&) { + return std::is_same::value ? 4 : 5; +} +} // namespace usr + +int main(int, char**) { + using std::pow; + using usr::pow; + + usr::usr_tag tag; + const std::complex ctag; + + assert(pow(tag, std::complex(1.0f)) == 0); + assert(pow(std::complex(1.0f), tag) == 2); + assert(pow(tag, std::complex(1.0)) == 0); + assert(pow(std::complex(1.0), tag) == 2); + assert(pow(tag, std::complex(1.0l)) == 0); + assert(pow(std::complex(1.0l), tag) == 2); + + assert(pow(1.0f, ctag) == 1); + assert(pow(ctag, 1.0f) == 3); + assert(pow(1.0, ctag) == 1); + assert(pow(ctag, 1.0) == 3); + assert(pow(1.0l, ctag) == 1); + assert(pow(ctag, 1.0l) == 3); + + assert(pow(ctag, std::complex(1.0f)) == 4); + assert(pow(std::complex(1.0f), ctag) == 5); + assert(pow(ctag, std::complex(1.0)) == 4); + assert(pow(std::complex(1.0), ctag) == 5); + assert(pow(ctag, std::complex(1.0l)) == 4); + assert(pow(std::complex(1.0l), ctag) == 5); + + return 0; +} diff --git a/libcxx/test/libcxx/system_reserved_names.gen.py b/libcxx/test/libcxx/system_reserved_names.gen.py index e29e7a2cdd6144..f01126249c8817 100644 --- a/libcxx/test/libcxx/system_reserved_names.gen.py +++ b/libcxx/test/libcxx/system_reserved_names.gen.py @@ -13,14 +13,20 @@ # RUN: %{python} %s %{libcxx-dir}/utils import sys + sys.path.append(sys.argv[1]) -from libcxx.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import ( + lit_header_restrictions, + lit_header_undeprecations, + public_headers, +) for header in public_headers: print( f"""\ //--- {header}.compile.pass.cpp {lit_header_restrictions.get(header, '')} +{lit_header_undeprecations.get(header, '')} #define SYSTEM_RESERVED_NAME This name should not be used in libc++ diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 2dc84963f0891e..48c501863cb76c 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -458,7 +458,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 2dc84963f0891e..48c501863cb76c 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -458,7 +458,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index 27e22975573584..6191c9012c631b 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -467,7 +467,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index b17eb1f2347a86..5d46162e3f8996 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -458,7 +458,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 9efec327889c1d..20fe9878ce3eae 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -446,7 +446,6 @@ ctgmath array ctgmath atomic ctgmath bit ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index e17f732663a9b2..5ee89ec307cc29 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -238,7 +238,6 @@ coroutine limits coroutine version cstddef version ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index c56f5cdfad0072..ee17223e66bee4 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -238,7 +238,6 @@ coroutine limits coroutine version cstddef version ctgmath bitset -ctgmath ccomplex ctgmath cctype ctgmath cerrno ctgmath climits diff --git a/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp index 4dff57f84f202c..764f4d02f44f44 100644 --- a/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp +++ b/libcxx/test/std/depr/depr.c.headers/ciso646.compile.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// -// +// // removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp new file mode 100644 index 00000000000000..e7290aab2c6616 --- /dev/null +++ b/libcxx/test/std/depr/depr.c.headers/stdalign_h.compile.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// test +// +// Even though is not provided by libc++, +// we still test that using it with libc++ on the search path will work. + +// TODO: GCC doesn't provide a proper for C++ until 15. +// UNSUPPORTED: gcc + +#include + +#ifndef __alignas_is_defined +# error __alignas_is_defined not defined +#endif + +#ifndef __alignof_is_defined +# error __alignof_is_defined not defined +#endif + +#ifdef alignas +# error alignas should not be defined +#endif + +#ifdef alignof +# error alignof should not be defined +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp new file mode 100644 index 00000000000000..0eaf82ce5cef01 --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/ccomplex.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: removed in C++20. Include instead.}} +#else +// expected-warning@ccomplex:* {{'__standard_header_ccomplex' is deprecated: Include instead.}} +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp new file mode 100644 index 00000000000000..04acd100815485 --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/ciso646.verify.cpp @@ -0,0 +1,18 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: clang-modules-build + +#include +// expected-warning@ciso646:* {{'__standard_header_ciso646' is deprecated: removed in C++20. Include instead.}} diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp new file mode 100644 index 00000000000000..dc9f1af55b3f14 --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdalign.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated: removed in C++20.}} +#else +// expected-warning@cstdalign:* {{'__standard_header_cstdalign' is deprecated}} +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp new file mode 100644 index 00000000000000..eddefe14d35eac --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/cstdbool.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated: removed in C++20.}} +#else +// expected-warning@cstdbool:* {{'__standard_header_cstdbool' is deprecated}} +#endif diff --git a/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp new file mode 100644 index 00000000000000..097ab1643d15af --- /dev/null +++ b/libcxx/test/std/depr/depr.cpp.headers/ctgmath.verify.cpp @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// check that is deprecated in C++17 and removed in C++20 +// When built with modules, should be omitted. + +// UNSUPPORTED: c++03, c++11, c++14 +// UNSUPPORTED: clang-modules-build + +#include "test_macros.h" + +#include + +#if TEST_STD_VER >= 20 +// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: removed in C++20. Include and instead.}} +#else +// expected-warning@ctgmath:* {{'__standard_header_ctgmath' is deprecated: Include and instead.}} +#endif diff --git a/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp new file mode 100644 index 00000000000000..fbbaf9b2d136f9 --- /dev/null +++ b/libcxx/test/std/language.support/support.runtime/cstdalign.compile.pass.cpp @@ -0,0 +1,29 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// test // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS + +#include + +#ifndef __alignas_is_defined +# error __alignas_is_defined not defined +#endif + +#ifndef __alignof_is_defined +# error __alignof_is_defined not defined +#endif + +#ifdef alignas +# error alignas should not be defined +#endif + +#ifdef alignof +# error alignof should not be defined +#endif diff --git a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp index 1d0e9b06a43d2e..9a35eea507c40a 100644 --- a/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/cstdbool.pass.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -// test +// test // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp index 2c8d054fbc527d..2e4679980577a9 100644 --- a/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp +++ b/libcxx/test/std/numerics/c.math/ctgmath.pass.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -// +// // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp index cc3f8cd6a9beb9..0ed116c6410639 100644 --- a/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp +++ b/libcxx/test/std/numerics/complex.number/ccmplx/ccomplex.pass.cpp @@ -6,7 +6,9 @@ // //===----------------------------------------------------------------------===// -// +// // deprecated in C++17, removed in C++20, but still provided by libc++ as an extension + +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS #include diff --git a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp index 1203b2f3ec18f9..00f9e2b8467837 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp @@ -34,15 +34,32 @@ struct test_alloc { typedef test_alloc other; }; - TEST_CONSTEXPR_CXX14 pointer allocate(size_type n, const void* = nullptr) { + TEST_CONSTEXPR test_alloc() TEST_NOEXCEPT {} + + template + TEST_CONSTEXPR test_alloc(const test_alloc&) TEST_NOEXCEPT {} + + pointer allocate(size_type n, const void* = nullptr) { allocated_ += n; return std::allocator().allocate(n); } - TEST_CONSTEXPR_CXX14 void deallocate(pointer p, size_type s) { + void deallocate(pointer p, size_type s) { allocated_ -= s; std::allocator().deallocate(p, s); } + + template + friend TEST_CONSTEXPR bool operator==(const test_alloc&, const test_alloc&) TEST_NOEXCEPT { + return true; + } + +#if TEST_STD_VER < 20 + template + friend TEST_CONSTEXPR bool operator!=(const test_alloc&, const test_alloc&) TEST_NOEXCEPT { + return false; + } +#endif }; template diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py index 3b12dcb9f56c0b..cac620e4f1fe77 100644 --- a/libcxx/utils/libcxx/header_information.py +++ b/libcxx/utils/libcxx/header_information.py @@ -66,6 +66,7 @@ def is_cstd(self) -> bool: "cmath", "csetjmp", "csignal", + "cstdalign", "cstdarg", "cstdbool", "cstddef", @@ -92,7 +93,7 @@ def has_cxx20_module(self) -> bool: experimental headers. """ # These headers have been removed in C++20 so are never part of a module. - removed_in_20 = ["ccomplex", "ciso646", "cstdbool", "ctgmath"] + removed_in_20 = ["ccomplex", "ciso646", "cstdalign", "cstdbool", "ctgmath"] return self.is_public() and not self.is_experimental() and not self.is_C_compatibility() and not self._name in removed_in_20 def is_cxx03_frozen_header(self) -> bool: @@ -236,6 +237,15 @@ def __hash__(self) -> int: "wctype.h": "// UNSUPPORTED: no-wide-characters", } +# Undeprecate headers that are deprecated in C++17 and removed in C++20. +lit_header_undeprecations = { + "ccomplex": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "ciso646": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "cstdalign": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "cstdbool": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", + "ctgmath": "// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS", +} + # This table was produced manually, by grepping the TeX source of the Standard's # library clauses for the string "#include". Each header's synopsis contains # explicit "#include" directives for its mandatory inclusions. diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index c80c4017d3512c..ebe77204264210 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -720,7 +720,7 @@ def: J<"plugin-opt=opt-remarks-format=">, HelpText<"Alias for --opt-remarks-format">; def: F<"plugin-opt=opt-remarks-with-hotness">, Alias, - HelpText<"Alias for --opt-remarks-with_hotness">; + HelpText<"Alias for --opt-remarks-with-hotness">; def: J<"plugin-opt=opt-remarks-hotness-threshold=">, Alias, HelpText<"Alias for --opt-remarks-hotness-threshold">; diff --git a/lldb/include/lldb/Breakpoint/BreakpointLocation.h b/lldb/include/lldb/Breakpoint/BreakpointLocation.h index cca00335bc3c67..3592291bb2d06e 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointLocation.h +++ b/lldb/include/lldb/Breakpoint/BreakpointLocation.h @@ -11,10 +11,12 @@ #include #include +#include #include "lldb/Breakpoint/BreakpointOptions.h" #include "lldb/Breakpoint/StoppointHitCounter.h" #include "lldb/Core/Address.h" +#include "lldb/Symbol/LineEntry.h" #include "lldb/Utility/UserID.h" #include "lldb/lldb-private.h" @@ -282,6 +284,25 @@ class BreakpointLocation /// Returns the breakpoint location ID. lldb::break_id_t GetID() const { return m_loc_id; } + /// Set the line entry that should be shown to users for this location. + /// It is up to the caller to verify that this is a valid entry to show. + /// The current use of this is to distinguish among line entries from a + /// virtual inlined call stack that all share the same address. + /// The line entry must have the same start address as the address for this + /// location. + bool SetPreferredLineEntry(const LineEntry &line_entry) { + if (m_address == line_entry.range.GetBaseAddress()) { + m_preferred_line_entry = line_entry; + return true; + } + assert(0 && "Tried to set a preferred line entry with a different address"); + return false; + } + + const std::optional GetPreferredLineEntry() { + return m_preferred_line_entry; + } + protected: friend class BreakpointSite; friend class BreakpointLocationList; @@ -306,6 +327,16 @@ class BreakpointLocation /// If it returns false we should continue, otherwise stop. bool IgnoreCountShouldStop(); + /// If this location knows that the virtual stack frame it represents is + /// not frame 0, return the suggested stack frame instead. This will happen + /// when the location's address contains a "virtual inlined call stack" and + /// the breakpoint was set on a file & line that are not at the bottom of that + /// stack. For now we key off the "preferred line entry" - looking for that + /// in the blocks that start with the stop PC. + /// This version of the API doesn't take an "inlined" parameter because it + /// only changes frames in the inline stack. + std::optional GetSuggestedStackFrameIndex(); + private: void SwapLocation(lldb::BreakpointLocationSP swap_from); @@ -369,6 +400,11 @@ class BreakpointLocation lldb::break_id_t m_loc_id; ///< Breakpoint location ID. StoppointHitCounter m_hit_counter; ///< Number of times this breakpoint /// location has been hit. + /// If this exists, use it to print the stop description rather than the + /// LineEntry m_address resolves to directly. Use this for instance when the + /// location was given somewhere in the virtual inlined call stack since the + /// Address always resolves to the lowest entry in the stack. + std::optional m_preferred_line_entry; void SetShouldResolveIndirectFunctions(bool do_resolve) { m_should_resolve_indirect_functions = do_resolve; diff --git a/lldb/include/lldb/Breakpoint/BreakpointSite.h b/lldb/include/lldb/Breakpoint/BreakpointSite.h index 17b76d51c1ae53..7b3f7be23639f2 100644 --- a/lldb/include/lldb/Breakpoint/BreakpointSite.h +++ b/lldb/include/lldb/Breakpoint/BreakpointSite.h @@ -170,6 +170,11 @@ class BreakpointSite : public std::enable_shared_from_this, /// \see lldb::DescriptionLevel void GetDescription(Stream *s, lldb::DescriptionLevel level); + // This runs through all the breakpoint locations owning this site and returns + // the greatest of their suggested stack frame indexes. This only handles + // inlined stack changes. + std::optional GetSuggestedStackFrameIndex(); + /// Tell whether a breakpoint has a location at this site. /// /// \param[in] bp_id diff --git a/lldb/include/lldb/Core/Declaration.h b/lldb/include/lldb/Core/Declaration.h index 4a0e9047b54695..c864b88c6b32a3 100644 --- a/lldb/include/lldb/Core/Declaration.h +++ b/lldb/include/lldb/Core/Declaration.h @@ -84,10 +84,14 @@ class Declaration { /// \param[in] declaration /// The const Declaration object to compare with. /// + /// \param[in] full + /// Same meaning as Full in FileSpec::Equal. True means an empty + /// directory is not equal to a specified one, false means it is equal. + /// /// \return /// Returns \b true if \b declaration is at the same file and /// line, \b false otherwise. - bool FileAndLineEqual(const Declaration &declaration) const; + bool FileAndLineEqual(const Declaration &declaration, bool full) const; /// Dump a description of this object to a Stream. /// diff --git a/lldb/include/lldb/Host/Editline.h b/lldb/include/lldb/Host/Editline.h index a02f90891599ad..57e2c831e3499d 100644 --- a/lldb/include/lldb/Host/Editline.h +++ b/lldb/include/lldb/Host/Editline.h @@ -30,9 +30,6 @@ #include "lldb/Host/Config.h" -#if LLDB_EDITLINE_USE_WCHAR -#include -#endif #include #include #include @@ -57,23 +54,6 @@ #include "llvm/ADT/FunctionExtras.h" -#if defined(__clang__) && defined(__has_warning) -#if __has_warning("-Wdeprecated-declarations") -#define LLDB_DEPRECATED_WARNING_DISABLE \ - _Pragma("clang diagnostic push") \ - _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#define LLDB_DEPRECATED_WARNING_RESTORE _Pragma("clang diagnostic pop") -#endif -#elif defined(__GNUC__) && __GNUC__ > 6 -#define LLDB_DEPRECATED_WARNING_DISABLE \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#define LLDB_DEPRECATED_WARNING_RESTORE _Pragma("GCC diagnostic pop") -#else -#define LLDB_DEPRECATED_WARNING_DISABLE -#define LLDB_DEPRECATED_WARNING_RESTORE -#endif - namespace lldb_private { namespace line_editor { @@ -383,11 +363,6 @@ class Editline { void SetEditLinePromptCallback(EditlinePromptCallbackType callbackFn); void SetGetCharacterFunction(EditlineGetCharCallbackType callbackFn); -#if LLDB_EDITLINE_USE_WCHAR - LLDB_DEPRECATED_WARNING_DISABLE - std::wstring_convert> m_utf8conv; - LLDB_DEPRECATED_WARNING_RESTORE -#endif ::EditLine *m_editline = nullptr; EditlineHistorySP m_history_sp; bool m_in_history = false; diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index fae90364deaf0a..45beac129e86f7 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -77,6 +77,18 @@ class StopInfo : public std::enable_shared_from_this { m_description.clear(); } + /// This gives the StopInfo a chance to suggest a stack frame to select. + /// Passing true for inlined_stack will request changes to the inlined + /// call stack. Passing false will request changes to the real stack + /// frame. The inlined stack gets adjusted before we call into the thread + /// plans so they can reason based on the correct values. The real stack + /// adjustment is handled after the frame recognizers get a chance to adjust + /// the frame. + virtual std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) { + return {}; + } + virtual bool IsValidForOperatingSystemThread(Thread &thread) { return true; } /// A Continue operation can result in a false stop event diff --git a/lldb/include/lldb/Target/ThreadPlanStepInRange.h b/lldb/include/lldb/Target/ThreadPlanStepInRange.h index f9ef87942a7c03..9da8370ef1c925 100644 --- a/lldb/include/lldb/Target/ThreadPlanStepInRange.h +++ b/lldb/include/lldb/Target/ThreadPlanStepInRange.h @@ -80,8 +80,8 @@ class ThreadPlanStepInRange : public ThreadPlanStepRange, bool m_step_past_prologue; // FIXME: For now hard-coded to true, we could put // a switch in for this if there's // demand for that. - bool m_virtual_step; // true if we've just done a "virtual step", i.e. just - // moved the inline stack depth. + LazyBool m_virtual_step; // true if we've just done a "virtual step", i.e. + // just moved the inline stack depth. ConstString m_step_into_target; ThreadPlanStepInRange(const ThreadPlanStepInRange &) = delete; const ThreadPlanStepInRange & diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py index 1bacd74a968c31..bcc179346836d1 100644 --- a/lldb/packages/Python/lldbsuite/test/configuration.py +++ b/lldb/packages/Python/lldbsuite/test/configuration.py @@ -46,6 +46,10 @@ make_path = None # The overriden dwarf verison. +# Don't use this to test the current compiler's +# DWARF version, as this won't be set if the +# version isn't overridden. +# Use lldbplatformutils.getDwarfVersion() instead. dwarf_version = 0 # Any overridden settings. diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index 63748a71f1122d..c29992ce9c7848 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -793,8 +793,6 @@ def request_launch( args_dict["env"] = env if stopOnEntry: args_dict["stopOnEntry"] = stopOnEntry - if disableASLR: - args_dict["disableASLR"] = disableASLR if disableSTDIO: args_dict["disableSTDIO"] = disableSTDIO if shellExpandArguments: @@ -829,6 +827,7 @@ def request_launch( if customThreadFormat: args_dict["customThreadFormat"] = customThreadFormat + args_dict["disableASLR"] = disableASLR args_dict["enableAutoVariableSummaries"] = enableAutoVariableSummaries args_dict["enableSyntheticChildDebugging"] = enableSyntheticChildDebugging args_dict["displayExtendedBacktrace"] = displayExtendedBacktrace diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp index ad9057c8141e99..c7ea50407ae1c7 100644 --- a/lldb/source/Breakpoint/BreakpointLocation.cpp +++ b/lldb/source/Breakpoint/BreakpointLocation.cpp @@ -508,8 +508,20 @@ void BreakpointLocation::GetDescription(Stream *s, s->PutCString("re-exported target = "); else s->PutCString("where = "); + + // If there's a preferred line entry for printing, use that. + bool show_function_info = true; + if (auto preferred = GetPreferredLineEntry()) { + sc.line_entry = *preferred; + // FIXME: We're going to get the function name wrong when the preferred + // line entry is not the lowest one. For now, just leave the function + // out in this case, but we really should also figure out how to easily + // fake the function name here. + show_function_info = false; + } sc.DumpStopContext(s, m_owner.GetTarget().GetProcessSP().get(), m_address, - false, true, false, true, true, true); + false, true, false, show_function_info, + show_function_info, show_function_info); } else { if (sc.module_sp) { s->EOL(); @@ -537,7 +549,10 @@ void BreakpointLocation::GetDescription(Stream *s, if (sc.line_entry.line > 0) { s->EOL(); s->Indent("location = "); - sc.line_entry.DumpStopContext(s, true); + if (auto preferred = GetPreferredLineEntry()) + preferred->DumpStopContext(s, true); + else + sc.line_entry.DumpStopContext(s, true); } } else { @@ -656,6 +671,50 @@ void BreakpointLocation::SendBreakpointLocationChangedEvent( } } +std::optional BreakpointLocation::GetSuggestedStackFrameIndex() { + auto preferred_opt = GetPreferredLineEntry(); + if (!preferred_opt) + return {}; + LineEntry preferred = *preferred_opt; + SymbolContext sc; + if (!m_address.CalculateSymbolContext(&sc)) + return {}; + // Don't return anything special if frame 0 is the preferred line entry. + // We not really telling the stack frame list to do anything special in that + // case. + if (!LineEntry::Compare(sc.line_entry, preferred)) + return {}; + + if (!sc.block) + return {}; + + // Blocks have their line info in Declaration form, so make one here: + Declaration preferred_decl(preferred.GetFile(), preferred.line, + preferred.column); + + uint32_t depth = 0; + Block *inlined_block = sc.block->GetContainingInlinedBlock(); + while (inlined_block) { + // If we've moved to a block that this isn't the start of, that's not + // our inlining info or call site, so we can stop here. + Address start_address; + if (!inlined_block->GetStartAddress(start_address) || + start_address != m_address) + return {}; + + const InlineFunctionInfo *info = inlined_block->GetInlinedFunctionInfo(); + if (info) { + if (preferred_decl == info->GetDeclaration()) + return depth; + if (preferred_decl == info->GetCallSite()) + return depth + 1; + } + inlined_block = inlined_block->GetInlinedParent(); + depth++; + } + return {}; +} + void BreakpointLocation::SwapLocation(BreakpointLocationSP swap_from) { m_address = swap_from->m_address; m_should_resolve_indirect_functions = diff --git a/lldb/source/Breakpoint/BreakpointResolver.cpp b/lldb/source/Breakpoint/BreakpointResolver.cpp index 8307689c7640cf..9643602d78c751 100644 --- a/lldb/source/Breakpoint/BreakpointResolver.cpp +++ b/lldb/source/Breakpoint/BreakpointResolver.cpp @@ -340,6 +340,21 @@ void BreakpointResolver::AddLocation(SearchFilter &filter, } BreakpointLocationSP bp_loc_sp(AddLocation(line_start)); + // If the address that we resolved the location to returns a different + // LineEntry from the one in the incoming SC, we're probably dealing with an + // inlined call site, so set that as the preferred LineEntry: + LineEntry resolved_entry; + if (!skipped_prologue && bp_loc_sp && + line_start.CalculateSymbolContextLineEntry(resolved_entry) && + LineEntry::Compare(resolved_entry, sc.line_entry)) { + // FIXME: The function name will also be wrong here. Do we need to record + // that as well, or can we figure that out again when we report this + // breakpoint location. + if (!bp_loc_sp->SetPreferredLineEntry(sc.line_entry)) { + LLDB_LOG(log, "Tried to add a preferred line entry that didn't have the " + "same address as this location's address."); + } + } if (log && bp_loc_sp && !GetBreakpoint()->IsInternal()) { StreamString s; bp_loc_sp->GetDescription(&s, lldb::eDescriptionLevelVerbose); diff --git a/lldb/source/Breakpoint/BreakpointSite.cpp b/lldb/source/Breakpoint/BreakpointSite.cpp index 3ca93f908e30b8..9700a57d3346e0 100644 --- a/lldb/source/Breakpoint/BreakpointSite.cpp +++ b/lldb/source/Breakpoint/BreakpointSite.cpp @@ -87,6 +87,23 @@ void BreakpointSite::GetDescription(Stream *s, lldb::DescriptionLevel level) { m_constituents.GetDescription(s, level); } +std::optional BreakpointSite::GetSuggestedStackFrameIndex() { + + std::optional result; + std::lock_guard guard(m_constituents_mutex); + for (BreakpointLocationSP loc_sp : m_constituents.BreakpointLocations()) { + std::optional loc_frame_index = + loc_sp->GetSuggestedStackFrameIndex(); + if (loc_frame_index) { + if (result) + result = std::max(*loc_frame_index, *result); + else + result = loc_frame_index; + } + } + return result; +} + bool BreakpointSite::IsInternal() const { return m_constituents.IsInternal(); } uint8_t *BreakpointSite::GetTrapOpcodeBytes() { return &m_trap_opcode[0]; } diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp index f9786529bcdb1c..e4c6e374446e82 100644 --- a/lldb/source/Commands/CommandObjectType.cpp +++ b/lldb/source/Commands/CommandObjectType.cpp @@ -2649,6 +2649,8 @@ class CommandObjectTypeLookup : public CommandObjectRaw { return false; LanguageType lt1 = lang1->GetLanguageType(); LanguageType lt2 = lang2->GetLanguageType(); + if (lt1 == lt2) + return false; if (lt1 == guessed_language) return true; // make the selected frame's language come first if (lt2 == guessed_language) diff --git a/lldb/source/Core/Declaration.cpp b/lldb/source/Core/Declaration.cpp index 579a3999d14ea0..a485c4b9ba48a7 100644 --- a/lldb/source/Core/Declaration.cpp +++ b/lldb/source/Core/Declaration.cpp @@ -70,8 +70,9 @@ int Declaration::Compare(const Declaration &a, const Declaration &b) { return 0; } -bool Declaration::FileAndLineEqual(const Declaration &declaration) const { - int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, true); +bool Declaration::FileAndLineEqual(const Declaration &declaration, + bool full) const { + int file_compare = FileSpec::Compare(this->m_file, declaration.m_file, full); return file_compare == 0 && this->m_line == declaration.m_line; } diff --git a/lldb/source/Core/SourceManager.cpp b/lldb/source/Core/SourceManager.cpp index fd5b49946c6a92..27a9edeef4249e 100644 --- a/lldb/source/Core/SourceManager.cpp +++ b/lldb/source/Core/SourceManager.cpp @@ -430,7 +430,7 @@ SourceManager::GetDefaultFileAndLine() { false; // Force it to be a debug symbol. function_options.include_inlines = true; executable_ptr->FindFunctions(main_name, CompilerDeclContext(), - lldb::eFunctionNameTypeBase, + lldb::eFunctionNameTypeFull, function_options, sc_list); for (const SymbolContext &sc : sc_list) { if (sc.function) { diff --git a/lldb/source/Host/common/Editline.cpp b/lldb/source/Host/common/Editline.cpp index 60117cb5f0e615..f95f854c5f220c 100644 --- a/lldb/source/Host/common/Editline.cpp +++ b/lldb/source/Host/common/Editline.cpp @@ -10,9 +10,8 @@ #include #include -#include "lldb/Host/Editline.h" - #include "lldb/Host/ConnectionFileDescriptor.h" +#include "lldb/Host/Editline.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/Host.h" #include "lldb/Utility/CompletionRequest.h" @@ -23,6 +22,7 @@ #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StringList.h" #include "lldb/Utility/Timeout.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Locale.h" @@ -444,7 +444,9 @@ StringList Editline::GetInputAsStringList(int line_count) { if (line_count == 0) break; #if LLDB_EDITLINE_USE_WCHAR - lines.AppendString(m_utf8conv.to_bytes(line)); + std::string buffer; + llvm::convertWideToUTF8(line, buffer); + lines.AppendString(buffer); #else lines.AppendString(line); #endif @@ -636,7 +638,9 @@ unsigned char Editline::BreakLineCommand(int ch) { if (m_fix_indentation_callback) { StringList lines = GetInputAsStringList(m_current_line_index + 1); #if LLDB_EDITLINE_USE_WCHAR - lines.AppendString(m_utf8conv.to_bytes(new_line_fragment)); + std::string buffer; + llvm::convertWideToUTF8(new_line_fragment, buffer); + lines.AppendString(buffer); #else lines.AppendString(new_line_fragment); #endif @@ -684,8 +688,9 @@ unsigned char Editline::EndOrAddLineCommand(int ch) { m_input_lines.clear(); for (unsigned index = 0; index < lines.GetSize(); index++) { #if LLDB_EDITLINE_USE_WCHAR - m_input_lines.insert(m_input_lines.end(), - m_utf8conv.from_bytes(lines[index])); + std::wstring wbuffer; + llvm::ConvertUTF8toWide(lines[index], wbuffer); + m_input_lines.insert(m_input_lines.end(), wbuffer); #else m_input_lines.insert(m_input_lines.end(), lines[index]); #endif @@ -869,7 +874,9 @@ unsigned char Editline::FixIndentationCommand(int ch) { currentLine = currentLine.erase(0, -indent_correction); } #if LLDB_EDITLINE_USE_WCHAR - m_input_lines[m_current_line_index] = m_utf8conv.from_bytes(currentLine); + std::wstring wbuffer; + llvm::ConvertUTF8toWide(currentLine, wbuffer); + m_input_lines[m_current_line_index] = wbuffer; #else m_input_lines[m_current_line_index] = currentLine; #endif @@ -1502,7 +1509,7 @@ bool Editline::GetLine(std::string &line, bool &interrupted) { } else { m_history_sp->Enter(input); #if LLDB_EDITLINE_USE_WCHAR - line = m_utf8conv.to_bytes(SplitLines(input)[0]); + llvm::convertWideToUTF8(SplitLines(input)[0], line); #else line = SplitLines(input)[0]; #endif @@ -1574,25 +1581,22 @@ bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) { out = (unsigned char)ch; return true; #else - LLDB_DEPRECATED_WARNING_DISABLE - std::codecvt_utf8 cvt; - LLDB_DEPRECATED_WARNING_RESTORE llvm::SmallString<4> input; for (;;) { - const char *from_next; - wchar_t *to_next; - std::mbstate_t state = std::mbstate_t(); input.push_back(ch); - switch (cvt.in(state, input.begin(), input.end(), from_next, &out, &out + 1, - to_next)) { - case std::codecvt_base::ok: + auto *cur_ptr = reinterpret_cast(input.begin()); + auto *end_ptr = reinterpret_cast(input.end()); + llvm::UTF32 code_point = 0; + llvm::ConversionResult cr = llvm::convertUTF8Sequence( + &cur_ptr, end_ptr, &code_point, llvm::lenientConversion); + switch (cr) { + case llvm::conversionOK: + out = code_point; return out != (EditLineGetCharType)WEOF; - - case std::codecvt_base::error: - case std::codecvt_base::noconv: + case llvm::targetExhausted: + case llvm::sourceIllegal: return false; - - case std::codecvt_base::partial: + case llvm::sourceExhausted: lldb::ConnectionStatus status; size_t read_count = m_input_connection.Read( &ch, 1, std::chrono::seconds(0), status, nullptr); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp index dee90804c52584..c18edd10b96819 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.cpp @@ -151,3 +151,19 @@ bool DWARFIndex::ProcessTypeDIEMatchQuery( return true; return callback(die); } + +void DWARFIndex::GetNamespacesWithParents( + ConstString name, const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback) { + GetNamespaces(name, [&](DWARFDIE die) { + return ProcessNamespaceDieMatchParents(parent_decl_ctx, die, callback); + }); +} + +bool DWARFIndex::ProcessNamespaceDieMatchParents( + const CompilerDeclContext &parent_decl_ctx, DWARFDIE die, + llvm::function_ref callback) { + if (!SymbolFileDWARF::DIEInDeclContext(parent_decl_ctx, die)) + return true; + return callback(die); +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h index fea3a4fd697389..ac1f75e91c2195 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFIndex.h @@ -71,6 +71,14 @@ class DWARFIndex { virtual void GetTypesWithQuery(TypeQuery &query, llvm::function_ref callback); + /// Get namespace DIEs whose base name match \param name with \param + /// parent_decl_ctx in its decl parent chain. A base implementation + /// is provided. Specializations should override this if they are able to + /// provide a faster implementation. + virtual void + GetNamespacesWithParents(ConstString name, + const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback); virtual void GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, const CompilerDeclContext &parent_decl_ctx, @@ -127,6 +135,9 @@ class DWARFIndex { bool ProcessTypeDIEMatchQuery(TypeQuery &query, DWARFDIE die, llvm::function_ref callback); + bool ProcessNamespaceDieMatchParents( + const CompilerDeclContext &parent_decl_ctx, DWARFDIE die, + llvm::function_ref callback); }; } // namespace dwarf } // namespace lldb_private::plugin diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp index c809e5ff7f8535..6f2cb455ec00e1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.cpp @@ -368,9 +368,10 @@ void DebugNamesDWARFIndex::GetFullyQualifiedType( continue; } - if (SameParentChain(parent_names, *parent_chain) && - !ProcessEntry(entry, callback)) - return; + if (SameParentChain(parent_names, *parent_chain)) { + if (!ProcessEntry(entry, callback)) + return; + } } m_fallback.GetFullyQualifiedType(context, callback); } @@ -554,17 +555,60 @@ void DebugNamesDWARFIndex::GetTypesWithQuery( continue; } - if (WithinParentChain(parent_contexts, *parent_chain) && - !ProcessEntry(entry, [&](DWARFDIE die) { - // After .debug_names filtering still sending to base class for - // further filtering before calling the callback. - return ProcessTypeDIEMatchQuery(query, die, callback); - })) - return; + if (WithinParentChain(parent_contexts, *parent_chain)) { + if (!ProcessEntry(entry, [&](DWARFDIE die) { + // After .debug_names filtering still sending to base class for + // further filtering before calling the callback. + return ProcessTypeDIEMatchQuery(query, die, callback); + })) + // If the callback returns false, we're done. + return; + } } m_fallback.GetTypesWithQuery(query, callback); } +void DebugNamesDWARFIndex::GetNamespacesWithParents( + ConstString name, const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback) { + std::vector parent_contexts = + parent_decl_ctx.GetCompilerContext(); + llvm::SmallVector parent_named_contexts; + std::copy_if(parent_contexts.rbegin(), parent_contexts.rend(), + std::back_inserter(parent_named_contexts), + [](const CompilerContext &ctx) { return !ctx.name.IsEmpty(); }); + for (const DebugNames::Entry &entry : + m_debug_names_up->equal_range(name.GetStringRef())) { + lldb_private::dwarf::Tag entry_tag = entry.tag(); + if (entry_tag == DW_TAG_namespace || + entry_tag == DW_TAG_imported_declaration) { + std::optional> parent_chain = + getParentChain(entry); + if (!parent_chain) { + // Fallback: use the base class implementation. + if (!ProcessEntry(entry, [&](DWARFDIE die) { + return ProcessNamespaceDieMatchParents(parent_decl_ctx, die, + callback); + })) + return; + continue; + } + + if (WithinParentChain(parent_named_contexts, *parent_chain)) { + if (!ProcessEntry(entry, [&](DWARFDIE die) { + // After .debug_names filtering still sending to base class for + // further filtering before calling the callback. + return ProcessNamespaceDieMatchParents(parent_decl_ctx, die, + callback); + })) + // If the callback returns false, we're done. + return; + } + } + } + m_fallback.GetNamespacesWithParents(name, parent_decl_ctx, callback); +} + void DebugNamesDWARFIndex::GetFunctions( const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, const CompilerDeclContext &parent_decl_ctx, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h index 074f68a8c55963..ab6cde12623f6a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DebugNamesDWARFIndex.h @@ -55,7 +55,9 @@ class DebugNamesDWARFIndex : public DWARFIndex { void GetTypesWithQuery(TypeQuery &query, llvm::function_ref callback) override; - + void GetNamespacesWithParents( + ConstString name, const CompilerDeclContext &parent_decl_ctx, + llvm::function_ref callback) override; void GetFunctions(const Module::LookupInfo &lookup_info, SymbolFileDWARF &dwarf, const CompilerDeclContext &parent_decl_ctx, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index e5b8eee8d08c24..f23f8cc3d781d0 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2900,7 +2900,7 @@ SymbolFileDWARF::FindNamespace(ConstString name, if (!DeclContextMatchesThisSymbolFile(parent_decl_ctx)) return namespace_decl_ctx; - m_index->GetNamespaces(name, [&](DWARFDIE die) { + m_index->GetNamespacesWithParents(name, parent_decl_ctx, [&](DWARFDIE die) { if (!DIEInDeclContext(parent_decl_ctx, die, only_root_namespaces)) return true; // The containing decl contexts don't match diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index f7d9c0d2d33065..5c7772a6db780d 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -230,7 +230,7 @@ Block *Block::GetContainingInlinedBlockWithCallSite( const auto *function_info = inlined_block->GetInlinedFunctionInfo(); if (function_info && - function_info->GetCallSite().FileAndLineEqual(find_call_site)) + function_info->GetCallSite().FileAndLineEqual(find_call_site, true)) return inlined_block; inlined_block = inlined_block->GetInlinedParent(); } diff --git a/lldb/source/Symbol/CompileUnit.cpp b/lldb/source/Symbol/CompileUnit.cpp index db8f8ce6bcbc92..73389b2e8479b3 100644 --- a/lldb/source/Symbol/CompileUnit.cpp +++ b/lldb/source/Symbol/CompileUnit.cpp @@ -251,7 +251,10 @@ void CompileUnit::ResolveSymbolContext( SymbolContextItem resolve_scope, SymbolContextList &sc_list, RealpathPrefixes *realpath_prefixes) { const FileSpec file_spec = src_location_spec.GetFileSpec(); - const uint32_t line = src_location_spec.GetLine().value_or(0); + const uint32_t line = + src_location_spec.GetLine().value_or(LLDB_INVALID_LINE_NUMBER); + const uint32_t column_num = + src_location_spec.GetColumn().value_or(LLDB_INVALID_COLUMN_NUMBER); const bool check_inlines = src_location_spec.GetCheckInlines(); // First find all of the file indexes that match our "file_spec". If @@ -268,7 +271,7 @@ void CompileUnit::ResolveSymbolContext( SymbolContext sc(GetModule()); sc.comp_unit = this; - if (line == 0) { + if (line == LLDB_INVALID_LINE_NUMBER) { if (file_spec_matches_cu_file_spec && !check_inlines) { // only append the context if we aren't looking for inline call sites by // file and line and if the file spec matches that of the compile unit @@ -312,6 +315,112 @@ void CompileUnit::ResolveSymbolContext( 0, file_indexes, src_location_spec, &line_entry); } + // If we didn't manage to find a breakpoint that matched the line number + // requested, that might be because it is only an inline call site, and + // doesn't have a line entry in the line table. Scan for that here. + // + // We are making the assumption that if there was an inlined function it will + // contribute at least 1 non-call-site entry to the line table. That's handy + // because we don't move line breakpoints over function boundaries, so if we + // found a hit, and there were also a call site entry, it would have to be in + // the function containing the PC of the line table match. That way we can + // limit the call site search to that function. + // We will miss functions that ONLY exist as a call site entry. + + if (line_entry.IsValid() && + (line_entry.line != line || line_entry.column != column_num) && + resolve_scope & eSymbolContextLineEntry && check_inlines) { + // We don't move lines over function boundaries, so the address in the + // line entry will be the in function that contained the line that might + // be a CallSite, and we can just iterate over that function to find any + // inline records, and dig up their call sites. + Address start_addr = line_entry.range.GetBaseAddress(); + Function *function = start_addr.CalculateSymbolContextFunction(); + + Declaration sought_decl(file_spec, line, column_num); + // We use this recursive function to descend the block structure looking + // for a block that has this Declaration as in it's CallSite info. + // This function recursively scans the sibling blocks of the incoming + // block parameter. + std::function examine_block = + [&sought_decl, &sc_list, &src_location_spec, resolve_scope, + &examine_block](Block &block) -> void { + // Iterate over the sibling child blocks of the incoming block. + Block *sibling_block = block.GetFirstChild(); + while (sibling_block) { + // We only have to descend through the regular blocks, looking for + // immediate inlines, since those are the only ones that will have this + // callsite. + const InlineFunctionInfo *inline_info = + sibling_block->GetInlinedFunctionInfo(); + if (inline_info) { + // If this is the call-site we are looking for, record that: + // We need to be careful because the call site from the debug info + // will generally have a column, but the user might not have specified + // it. + Declaration found_decl = inline_info->GetCallSite(); + uint32_t sought_column = sought_decl.GetColumn(); + if (found_decl.FileAndLineEqual(sought_decl, false) && + (sought_column == LLDB_INVALID_COLUMN_NUMBER || + sought_column == found_decl.GetColumn())) { + // If we found a call site, it belongs not in this inlined block, + // but in the parent block that inlined it. + Address parent_start_addr; + if (sibling_block->GetParent()->GetStartAddress( + parent_start_addr)) { + SymbolContext sc; + parent_start_addr.CalculateSymbolContext(&sc, resolve_scope); + // Now swap out the line entry for the one we found. + LineEntry call_site_line = sc.line_entry; + call_site_line.line = found_decl.GetLine(); + call_site_line.column = found_decl.GetColumn(); + bool matches_spec = true; + // If the user asked for an exact match, we need to make sure the + // call site we found actually matches the location. + if (src_location_spec.GetExactMatch()) { + matches_spec = false; + if ((src_location_spec.GetFileSpec() == + sc.line_entry.GetFile()) && + (src_location_spec.GetLine() && + *src_location_spec.GetLine() == call_site_line.line) && + (src_location_spec.GetColumn() && + *src_location_spec.GetColumn() == call_site_line.column)) + matches_spec = true; + } + if (matches_spec && + sibling_block->GetRangeAtIndex(0, call_site_line.range)) { + SymbolContext call_site_sc(sc.target_sp, sc.module_sp, + sc.comp_unit, sc.function, sc.block, + &call_site_line, sc.symbol); + sc_list.Append(call_site_sc); + } + } + } + } + + // Descend into the child blocks: + examine_block(*sibling_block); + // Now go to the next sibling: + sibling_block = sibling_block->GetSibling(); + } + }; + + if (function) { + // We don't need to examine the function block, it can't be inlined. + Block &func_block = function->GetBlock(true); + examine_block(func_block); + } + // If we found entries here, we are done. We only get here because we + // didn't find an exact line entry for this line & column, but if we found + // an exact match from the call site info that's strictly better than + // continuing to look for matches further on in the file. + // FIXME: Should I also do this for "call site line exists between the + // given line number and the later line we found in the line table"? That's + // a closer approximation to our general sliding algorithm. + if (sc_list.GetSize()) + return; + } + // If "exact == true", then "found_line" will be the same as "line". If // "exact == false", the "found_line" will be the closest line entry // with a line number greater than "line" and we will use this for our diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index 3849ec5ed178d9..94a381edd5e202 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -85,121 +85,32 @@ void StackFrameList::ResetCurrentInlinedDepth() { return; std::lock_guard guard(m_mutex); - - GetFramesUpTo(0, DoNotAllowInterruption); - if (m_frames.empty()) - return; - if (!m_frames[0]->IsInlined()) { - m_current_inlined_depth = UINT32_MAX; - m_current_inlined_pc = LLDB_INVALID_ADDRESS; - Log *log = GetLog(LLDBLog::Step); - if (log && log->GetVerbose()) - LLDB_LOGF( - log, - "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); - return; - } - // We only need to do something special about inlined blocks when we are - // at the beginning of an inlined function: - // FIXME: We probably also have to do something special if the PC is at - // the END of an inlined function, which coincides with the end of either - // its containing function or another inlined function. - - Block *block_ptr = m_frames[0]->GetFrameBlock(); - if (!block_ptr) - return; + m_current_inlined_pc = LLDB_INVALID_ADDRESS; + m_current_inlined_depth = UINT32_MAX; - Address pc_as_address; - lldb::addr_t curr_pc = m_thread.GetRegisterContext()->GetPC(); - pc_as_address.SetLoadAddress(curr_pc, &(m_thread.GetProcess()->GetTarget())); - AddressRange containing_range; - if (!block_ptr->GetRangeContainingAddress(pc_as_address, containing_range) || - pc_as_address != containing_range.GetBaseAddress()) - return; - - // If we got here because of a breakpoint hit, then set the inlined depth - // depending on where the breakpoint was set. If we got here because of a - // crash, then set the inlined depth to the deepest most block. Otherwise, - // we stopped here naturally as the result of a step, so set ourselves in the - // containing frame of the whole set of nested inlines, so the user can then - // "virtually" step into the frames one by one, or next over the whole mess. - // Note: We don't have to handle being somewhere in the middle of the stack - // here, since ResetCurrentInlinedDepth doesn't get called if there is a - // valid inlined depth set. StopInfoSP stop_info_sp = m_thread.GetStopInfo(); if (!stop_info_sp) return; - switch (stop_info_sp->GetStopReason()) { - case eStopReasonWatchpoint: - case eStopReasonException: - case eStopReasonExec: - case eStopReasonFork: - case eStopReasonVFork: - case eStopReasonVForkDone: - case eStopReasonSignal: - // In all these cases we want to stop in the deepest frame. - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = 0; - break; - case eStopReasonBreakpoint: { - // FIXME: Figure out what this break point is doing, and set the inline - // depth appropriately. Be careful to take into account breakpoints that - // implement step over prologue, since that should do the default - // calculation. For now, if the breakpoints corresponding to this hit are - // all internal, I set the stop location to the top of the inlined stack, - // since that will make things like stepping over prologues work right. - // But if there are any non-internal breakpoints I do to the bottom of the - // stack, since that was the old behavior. - uint32_t bp_site_id = stop_info_sp->GetValue(); - BreakpointSiteSP bp_site_sp( - m_thread.GetProcess()->GetBreakpointSiteList().FindByID(bp_site_id)); - bool all_internal = true; - if (bp_site_sp) { - uint32_t num_owners = bp_site_sp->GetNumberOfConstituents(); - for (uint32_t i = 0; i < num_owners; i++) { - Breakpoint &bp_ref = - bp_site_sp->GetConstituentAtIndex(i)->GetBreakpoint(); - if (!bp_ref.IsInternal()) { - all_internal = false; - } - } - } - if (!all_internal) { - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = 0; - break; - } - } - [[fallthrough]]; - default: { - // Otherwise, we should set ourselves at the container of the inlining, so - // that the user can descend into them. So first we check whether we have - // more than one inlined block sharing this PC: - int num_inlined_functions = 0; - - for (Block *container_ptr = block_ptr->GetInlinedParent(); - container_ptr != nullptr; - container_ptr = container_ptr->GetInlinedParent()) { - if (!container_ptr->GetRangeContainingAddress(pc_as_address, - containing_range)) - break; - if (pc_as_address != containing_range.GetBaseAddress()) - break; - num_inlined_functions++; - } - m_current_inlined_pc = curr_pc; - m_current_inlined_depth = num_inlined_functions + 1; - Log *log = GetLog(LLDBLog::Step); + bool inlined = true; + auto inline_depth = stop_info_sp->GetSuggestedStackFrameIndex(inlined); + // We're only adjusting the inlined stack here. + Log *log = GetLog(LLDBLog::Step); + if (inline_depth) { + m_current_inlined_depth = *inline_depth; + m_current_inlined_pc = m_thread.GetRegisterContext()->GetPC(); + if (log && log->GetVerbose()) LLDB_LOGF(log, "ResetCurrentInlinedDepth: setting inlined " "depth: %d 0x%" PRIx64 ".\n", - m_current_inlined_depth, curr_pc); - - break; - } + m_current_inlined_depth, m_current_inlined_pc); + } else { + if (log && log->GetVerbose()) + LLDB_LOGF( + log, + "ResetCurrentInlinedDepth: Invalidating current inlined depth.\n"); } } @@ -816,19 +727,48 @@ void StackFrameList::SelectMostRelevantFrame() { RecognizedStackFrameSP recognized_frame_sp = frame_sp->GetRecognizedFrame(); - if (!recognized_frame_sp) { - LLDB_LOG(log, "Frame #0 not recognized"); - return; + if (recognized_frame_sp) { + if (StackFrameSP most_relevant_frame_sp = + recognized_frame_sp->GetMostRelevantFrame()) { + LLDB_LOG(log, "Found most relevant frame at index {0}", + most_relevant_frame_sp->GetFrameIndex()); + SetSelectedFrame(most_relevant_frame_sp.get()); + return; + } } + LLDB_LOG(log, "Frame #0 not recognized"); - if (StackFrameSP most_relevant_frame_sp = - recognized_frame_sp->GetMostRelevantFrame()) { - LLDB_LOG(log, "Found most relevant frame at index {0}", - most_relevant_frame_sp->GetFrameIndex()); - SetSelectedFrame(most_relevant_frame_sp.get()); - } else { - LLDB_LOG(log, "No relevant frame!"); + // If this thread has a non-trivial StopInof, then let it suggest + // a most relevant frame: + StopInfoSP stop_info_sp = m_thread.GetStopInfo(); + uint32_t stack_idx = 0; + bool found_relevant = false; + if (stop_info_sp) { + // Here we're only asking the stop info if it wants to adjust the real stack + // index. We have to ask about the m_inlined_stack_depth in + // Thread::ShouldStop since the plans need to reason with that info. + bool inlined = false; + std::optional stack_opt = + stop_info_sp->GetSuggestedStackFrameIndex(inlined); + if (stack_opt) { + stack_idx = *stack_opt; + found_relevant = true; + } } + + frame_sp = GetFrameAtIndex(stack_idx); + if (!frame_sp) + LLDB_LOG(log, "Stop info suggested relevant frame {0} but it didn't exist", + stack_idx); + else if (found_relevant) + LLDB_LOG(log, "Setting selected frame from stop info to {0}", stack_idx); + // Note, we don't have to worry about "inlined" frames here, because we've + // already calculated the inlined frame in Thread::ShouldStop, and + // SetSelectedFrame will take care of that adjustment for us. + SetSelectedFrame(frame_sp.get()); + + if (!found_relevant) + LLDB_LOG(log, "No relevant frame!"); } uint32_t StackFrameList::GetSelectedFrameIndex( @@ -841,6 +781,7 @@ uint32_t StackFrameList::GetSelectedFrameIndex( // isn't set, then don't force a selection here, just return 0. if (!select_most_relevant) return 0; + // If the inlined stack frame is set, then use that: m_selected_frame_idx = 0; } return *m_selected_frame_idx; diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 60aa65ed38c749..f6387d47504e62 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -15,6 +15,7 @@ #include "lldb/Breakpoint/WatchpointResource.h" #include "lldb/Core/Debugger.h" #include "lldb/Expression/UserExpression.h" +#include "lldb/Symbol/Block.h" #include "lldb/Target/Process.h" #include "lldb/Target/StopInfo.h" #include "lldb/Target/Target.h" @@ -246,6 +247,22 @@ class StopInfoBreakpoint : public StopInfo { return m_description.c_str(); } + std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) override { + if (!inlined_stack) + return {}; + + ThreadSP thread_sp(m_thread_wp.lock()); + if (!thread_sp) + return {}; + BreakpointSiteSP bp_site_sp( + thread_sp->GetProcess()->GetBreakpointSiteList().FindByID(m_value)); + if (!bp_site_sp) + return {}; + + return bp_site_sp->GetSuggestedStackFrameIndex(); + } + protected: bool ShouldStop(Event *event_ptr) override { // This just reports the work done by PerformAction or the synchronous @@ -1164,6 +1181,44 @@ class StopInfoTrace : public StopInfo { else return m_description.c_str(); } + + std::optional + GetSuggestedStackFrameIndex(bool inlined_stack) override { + // Trace only knows how to adjust inlined stacks: + if (!inlined_stack) + return {}; + + ThreadSP thread_sp = GetThread(); + StackFrameSP frame_0_sp = thread_sp->GetStackFrameAtIndex(0); + if (!frame_0_sp) + return {}; + if (!frame_0_sp->IsInlined()) + return {}; + Block *block_ptr = frame_0_sp->GetFrameBlock(); + if (!block_ptr) + return {}; + Address pc_address = frame_0_sp->GetFrameCodeAddress(); + AddressRange containing_range; + if (!block_ptr->GetRangeContainingAddress(pc_address, containing_range) || + pc_address != containing_range.GetBaseAddress()) + return {}; + + int num_inlined_functions = 0; + + for (Block *container_ptr = block_ptr->GetInlinedParent(); + container_ptr != nullptr; + container_ptr = container_ptr->GetInlinedParent()) { + if (!container_ptr->GetRangeContainingAddress(pc_address, + containing_range)) + break; + if (pc_address != containing_range.GetBaseAddress()) + break; + + num_inlined_functions++; + } + inlined_stack = true; + return num_inlined_functions + 1; + } }; // StopInfoException diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index 8373cdc36268f8..735295e6f25937 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -619,6 +619,14 @@ void Thread::WillStop() { void Thread::SetupForResume() { if (GetResumeState() != eStateSuspended) { + // First check whether this thread is going to "actually" resume at all. + // For instance, if we're stepping from one level to the next of an + // virtual inlined call stack, we just change the inlined call stack index + // without actually running this thread. In that case, for this thread we + // shouldn't push a step over breakpoint plan or do that work. + if (GetCurrentPlan()->IsVirtualStep()) + return; + // If we're at a breakpoint push the step-over breakpoint plan. Do this // before telling the current plan it will resume, since we might change // what the current plan is. diff --git a/lldb/source/Target/ThreadPlanStepInRange.cpp b/lldb/source/Target/ThreadPlanStepInRange.cpp index 567dcc26d0d372..325a70619908b6 100644 --- a/lldb/source/Target/ThreadPlanStepInRange.cpp +++ b/lldb/source/Target/ThreadPlanStepInRange.cpp @@ -41,7 +41,7 @@ ThreadPlanStepInRange::ThreadPlanStepInRange( "Step Range stepping in", thread, range, addr_context, stop_others), ThreadPlanShouldStopHere(this), m_step_past_prologue(true), - m_virtual_step(false), m_step_into_target(step_into_target) { + m_virtual_step(eLazyBoolCalculate), m_step_into_target(step_into_target) { SetCallbacks(); SetFlagsToDefault(); SetupAvoidNoDebug(step_in_avoids_code_without_debug_info, @@ -149,7 +149,7 @@ bool ThreadPlanStepInRange::ShouldStop(Event *event_ptr) { m_sub_plan_sp.reset(); } - if (m_virtual_step) { + if (m_virtual_step == eLazyBoolYes) { // If we've just completed a virtual step, all we need to do is check for a // ShouldStopHere plan, and otherwise we're done. // FIXME - This can be both a step in and a step out. Probably should @@ -431,7 +431,7 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool return_value = false; - if (m_virtual_step) { + if (m_virtual_step == eLazyBoolYes) { return_value = true; } else { StopInfoSP stop_info_sp = GetPrivateStopInfo(); @@ -460,10 +460,13 @@ bool ThreadPlanStepInRange::DoPlanExplainsStop(Event *event_ptr) { bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, bool current_plan) { - m_virtual_step = false; + m_virtual_step = eLazyBoolCalculate; if (resume_state == eStateStepping && current_plan) { Thread &thread = GetThread(); // See if we are about to step over a virtual inlined call. + // But if we already know we're virtual stepping, don't decrement the + // inlined depth again... + bool step_without_resume = thread.DecrementCurrentInlinedDepth(); if (step_without_resume) { Log *log = GetLog(LLDBLog::Step); @@ -476,11 +479,20 @@ bool ThreadPlanStepInRange::DoWillResume(lldb::StateType resume_state, // FIXME: Maybe it would be better to create a InlineStep stop reason, but // then // the whole rest of the world would have to handle that stop reason. - m_virtual_step = true; + m_virtual_step = eLazyBoolYes; } return !step_without_resume; } return true; } -bool ThreadPlanStepInRange::IsVirtualStep() { return m_virtual_step; } +bool ThreadPlanStepInRange::IsVirtualStep() { + if (m_virtual_step == eLazyBoolCalculate) { + Thread &thread = GetThread(); + if (thread.GetCurrentInlinedDepth() == UINT32_MAX) + m_virtual_step = eLazyBoolNo; + else + m_virtual_step = eLazyBoolYes; + } + return m_virtual_step == eLazyBoolYes; +} diff --git a/lldb/source/Target/ThreadPlanStepOverRange.cpp b/lldb/source/Target/ThreadPlanStepOverRange.cpp index ef5b4b5c434d16..643ee827c865cb 100644 --- a/lldb/source/Target/ThreadPlanStepOverRange.cpp +++ b/lldb/source/Target/ThreadPlanStepOverRange.cpp @@ -402,7 +402,7 @@ bool ThreadPlanStepOverRange::DoWillResume(lldb::StateType resume_state, if (in_inlined_stack) { Log *log = GetLog(LLDBLog::Step); LLDB_LOGF(log, - "ThreadPlanStepInRange::DoWillResume: adjusting range to " + "ThreadPlanStepOverRange::DoWillResume: adjusting range to " "the frame at inlined depth %d.", thread.GetCurrentInlinedDepth()); StackFrameSP stack_sp = thread.GetStackFrameAtIndex(0); diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py index 0919eb3c5dd812..93d5392830b508 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx/initializerlist/TestInitializerList.py @@ -40,5 +40,3 @@ def test(self): "frame variable ils", substrs=['[4] = "surprise it is a long string!! yay!!"'], ) - - self.expect("image list", substrs=self.getLibcPlusPlusLibs()) diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py index 5eb3fc3cada921..08ac9290ee85ac 100644 --- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py +++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py @@ -132,12 +132,39 @@ def test_read_registers_using_g_packets(self): target = self.createTarget("a.yaml") process = self.connect(target) - self.assertEqual(1, self.server.responder.packetLog.count("g")) - self.server.responder.packetLog = [] + # We want to make sure that the process is using the g packet, but it's + # not required the "connect" should read all registers. However, it might + # have... So we need to wait till we explicitly 'read_registers' to do + # test. + # Also, even with the use-g-packet-for-reading lldb will sometimes send p0 + # early on to see if the packet is supported. So we can't say that there + # will be NO p packets. + # But there certainly should be no p packets after the g packet. + self.read_registers(process) - # Reading registers should not cause any 'p' packets to be exchanged. + print(f"\nPACKET LOG:\n{self.server.responder.packetLog}\n") + g_pos = 0 + try: + g_pos = self.server.responder.packetLog.index("g") + except err: + self.fail("'g' packet not found after fetching registers") + + try: + second_g = self.server.responder.packetLog.index("g", g_pos) + self.fail("Found more than one 'g' packet") + except: + pass + + # Make sure there aren't any `p` packets after the `g` packet: self.assertEqual( - 0, len([p for p in self.server.responder.packetLog if p.startswith("p")]) + 0, + len( + [ + p + for p in self.server.responder.packetLog[g_pos:] + if p.startswith("p") + ] + ), ) def test_read_registers_using_p_packets(self): diff --git a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py index 752c3a9cbd286a..f52e0f0fd5bcfe 100644 --- a/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py +++ b/lldb/test/API/functionalities/inline-stepping/TestInlineStepping.py @@ -32,6 +32,12 @@ def test_step_in_template_with_python_api(self): self.build() self.step_in_template() + @add_test_categories(["pyapi"]) + def test_virtual_inline_stepping(self): + """Test stepping through a virtual inlined call stack""" + self.build() + self.virtual_inline_stepping() + def setUp(self): # Call super's setUp(). TestBase.setUp(self) @@ -357,3 +363,60 @@ def step_in_template(self): step_sequence = [["// In max_value specialized", "into"]] self.run_step_sequence(step_sequence) + + def run_to_call_site_and_step(self, source_regex, func_name, start_pos): + main_spec = lldb.SBFileSpec("calling.cpp") + # Set the breakpoint by file and line, not sourced regex because + # we want to make sure we can set breakpoints on call sites: + call_site_line_num = line_number(self.main_source, source_regex) + target, process, thread, bkpt = lldbutil.run_to_line_breakpoint( + self, main_spec, call_site_line_num + ) + + # Make sure that the location is at the call site (run_to_line_breakpoint already asserted + # that there's one location.): + bkpt_loc = bkpt.location[0] + strm = lldb.SBStream() + result = bkpt_loc.GetDescription(strm, lldb.eDescriptionLevelFull) + + self.assertTrue(result, "Got a location description") + desc = strm.GetData() + self.assertIn(f"calling.cpp:{call_site_line_num}", desc, "Right line listed") + # We don't get the function name right yet - so we omit it in printing. + # Turn on this test when that is working. + # self.assertIn(func_name, desc, "Right function listed") + + pc = thread.frame[0].pc + for i in range(start_pos, 3): + thread.StepInto() + frame_0 = thread.frame[0] + + trivial_line_num = line_number( + self.main_source, f"In caller_trivial_inline_{i}." + ) + self.assertEqual( + frame_0.line_entry.line, + trivial_line_num, + f"Stepped into the caller_trivial_inline_{i}", + ) + if pc != frame_0.pc: + # If we get here, we stepped to the expected line number, but + # the compiler on this system has decided to insert an instruction + # between the call site of an inlined function with no arguments, + # returning void, and its immediate call to another void inlined function + # with no arguments. We aren't going to be testing virtual inline + # stepping for this function... + break + + process.Kill() + target.Clear() + + def virtual_inline_stepping(self): + """Use the Python API's to step through a virtual inlined stack""" + self.run_to_call_site_and_step("At caller_trivial_inline_1", "main", 1) + self.run_to_call_site_and_step( + "In caller_trivial_inline_1", "caller_trivial_inline_1", 2 + ) + self.run_to_call_site_and_step( + "In caller_trivial_inline_2", "caller_trivial_inline_2", 3 + ) diff --git a/lldb/test/API/functionalities/inline-stepping/calling.cpp b/lldb/test/API/functionalities/inline-stepping/calling.cpp index 49179ce7c97883..d7ee56b3c07909 100644 --- a/lldb/test/API/functionalities/inline-stepping/calling.cpp +++ b/lldb/test/API/functionalities/inline-stepping/calling.cpp @@ -13,6 +13,12 @@ int called_by_inline_ref (int &value); inline void inline_trivial_1 () __attribute__((always_inline)); inline void inline_trivial_2 () __attribute__((always_inline)); +// These three should share the same initial pc so we can test +// virtual inline stepping. +inline void caller_trivial_inline_1() __attribute__((always_inline)); +inline void caller_trivial_inline_2() __attribute__((always_inline)); +inline void caller_trivial_inline_3() __attribute__((always_inline)); + void caller_trivial_1 (); void caller_trivial_2 (); @@ -79,6 +85,23 @@ caller_trivial_2 () inline_value += 1; // At increment in caller_trivial_2. } +// When you call caller_trivial_inline_1, the inlined call-site +// should share a PC with all three of the following inlined +// functions, so we can exercise "virtual inline stepping". +void caller_trivial_inline_1() { + caller_trivial_inline_2(); // In caller_trivial_inline_1. + inline_value += 1; +} + +void caller_trivial_inline_2() { + caller_trivial_inline_3(); // In caller_trivial_inline_2. + inline_value += 1; +} + +void caller_trivial_inline_3() { + inline_value += 1; // In caller_trivial_inline_3. +} + void called_by_inline_trivial () { @@ -132,5 +155,7 @@ main (int argc, char **argv) max_value(123, 456); // Call max_value template max_value(std::string("abc"), std::string("0022")); // Call max_value specialized + caller_trivial_inline_1(); // At caller_trivial_inline_1. + return 0; // About to return from main. } diff --git a/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py b/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py index b5e8115160d209..41141164769ec2 100644 --- a/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py +++ b/lldb/test/API/lang/cpp/namespace/TestNamespaceLookup.py @@ -8,7 +8,7 @@ from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - +from lldbsuite.test import lldbplatformutil class NamespaceLookupTestCase(TestBase): def setUp(self): @@ -167,7 +167,10 @@ def test_scope_lookup_with_run_command(self): self.runToBkpt("continue") # FIXME: In DWARF 5 with dsyms, the ordering of functions is slightly # different, which also hits the same issues mentioned previously. - if configuration.dwarf_version <= 4 or self.getDebugInfo() == "dwarf": + if ( + int(lldbplatformutil.getDwarfVersion()) <= 4 + or self.getDebugInfo() == "dwarf" + ): self.expect_expr("func()", result_type="int", result_value="2") # Continue to BP_ns_scope at ns scope diff --git a/lldb/test/API/python_api/process/io/TestProcessIO.py b/lldb/test/API/python_api/process/io/TestProcessIO.py index 3b5c7c48c51f4d..5d9727add399b5 100644 --- a/lldb/test/API/python_api/process/io/TestProcessIO.py +++ b/lldb/test/API/python_api/process/io/TestProcessIO.py @@ -99,31 +99,38 @@ def test_stdout_stderr_redirection(self): @expectedFlakeyLinux(bugnumber="llvm.org/pr26437") @skipIfDarwinEmbedded # debugserver can't create/write files on the device def test_stdout_stderr_redirection_to_existing_files(self): - """Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR without redirecting STDIN to output files already exist.""" + """Exercise SBLaunchInfo::AddOpenFileAction() for STDOUT and STDERR redirect to output files already exist.""" self.setup_test() self.build() self.create_target() - self.write_file_with_placeholder(self.output_file) - self.write_file_with_placeholder(self.error_file) - self.redirect_stdout() - self.redirect_stderr() - self.run_process(True) - output = self.read_output_file_and_delete() - error = self.read_error_file_and_delete() - self.check_process_output(output, error) - def write_file_with_placeholder(self, target_file): + # Create the output and error files with placeholder placeholder = "This content should be overwritten." + # Local file directory and working directory are the same for local debugging + f = open(self.local_output_file, "w") + f.write(placeholder) + f.close() + f = open(self.local_error_file, "w") + f.write(placeholder) + f.close() if lldb.remote_platform: self.runCmd( - 'platform file write "{target}" -d "{data}"'.format( - target=target_file, data=placeholder + 'platform put-file "{local}" "{remote}"'.format( + local=self.local_output_file, remote=self.output_file + ) + ) + self.runCmd( + 'platform put-file "{local}" "{remote}"'.format( + local=self.local_error_file, remote=self.error_file ) ) - else: - f = open(target_file, "w") - f.write(placeholder) - f.close() + + self.redirect_stdout() + self.redirect_stderr() + self.run_process(True) + output = self.read_output_file_and_delete() + error = self.read_error_file_and_delete() + self.check_process_output(output, error) # target_file - path on local file system or remote file system if running remote # local_file - path on local system diff --git a/lldb/test/API/python_api/type/TestTypeList.py b/lldb/test/API/python_api/type/TestTypeList.py index bc4d00c17c5551..09879276b44aa3 100644 --- a/lldb/test/API/python_api/type/TestTypeList.py +++ b/lldb/test/API/python_api/type/TestTypeList.py @@ -6,7 +6,7 @@ from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil - +from lldbsuite.test import lldbplatformutil class TypeAndTypeListTestCase(TestBase): def setUp(self): @@ -248,7 +248,7 @@ def test(self): self.assertEqual(myint_arr_element_type, myint_type) # Test enum methods. Requires DW_AT_enum_class which was added in Dwarf 4. - if configuration.dwarf_version >= 4: + if int(lldbplatformutil.getDwarfVersion()) >= 4: enum_type = target.FindFirstType("EnumType") self.assertTrue(enum_type) self.DebugSBType(enum_type) diff --git a/lldb/tools/lldb-dap/ProgressEvent.cpp b/lldb/tools/lldb-dap/ProgressEvent.cpp index 8a660b50af1205..0dcc2ee81001d5 100644 --- a/lldb/tools/lldb-dap/ProgressEvent.cpp +++ b/lldb/tools/lldb-dap/ProgressEvent.cpp @@ -110,7 +110,6 @@ json::Value ProgressEvent::ToJSON() const { std::string progress_id_str; llvm::raw_string_ostream progress_id_strm(progress_id_str); progress_id_strm << m_progress_id; - progress_id_strm.flush(); body.try_emplace("progressId", progress_id_str); if (m_event_type == progressStart) { diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index f9e0605fce29d6..fdc9bfae1876c5 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -181,6 +181,9 @@ TEST(DWARFExpression, DW_OP_bra) { }), // clang-format on llvm::HasValue(0x42)); + + EXPECT_THAT_ERROR(Evaluate({DW_OP_bra, 0x01, 0x00}).takeError(), + llvm::Failed()); } TEST(DWARFExpression, DW_OP_convert) { diff --git a/lldb/unittests/Host/FileActionTest.cpp b/lldb/unittests/Host/FileActionTest.cpp index 3d2c722552c9c2..56227cd587e5bb 100644 --- a/lldb/unittests/Host/FileActionTest.cpp +++ b/lldb/unittests/Host/FileActionTest.cpp @@ -10,6 +10,9 @@ #include "lldb/Host/FileAction.h" #include "gtest/gtest.h" +#if defined(_WIN32) +#include "lldb/Host/windows/PosixApi.h" +#endif using namespace lldb_private; diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h index 0dcd4d90086eff..5b525c8e56ecc9 100644 --- a/llvm/include/llvm/ADT/StringRef.h +++ b/llvm/include/llvm/ADT/StringRef.h @@ -60,7 +60,11 @@ namespace llvm { using const_reverse_iterator = std::reverse_iterator; private: - std::string_view View; + /// The start of the string, in an external buffer. + const char *Data = nullptr; + + /// The length of the string. + size_t Length = 0; // Workaround memcmp issue with null pointers (undefined behavior) // by providing a specialized version @@ -82,26 +86,28 @@ namespace llvm { /// Construct a string ref from a cstring. /*implicit*/ constexpr StringRef(const char *Str LLVM_LIFETIME_BOUND) - : View(Str, Str ? + : Data(Str), Length(Str ? // GCC 7 doesn't have constexpr char_traits. Fall back to __builtin_strlen. #if defined(_GLIBCXX_RELEASE) && _GLIBCXX_RELEASE < 8 - __builtin_strlen(Str) + __builtin_strlen(Str) #else - std::char_traits::length(Str) + std::char_traits::length(Str) #endif - : 0) { + : 0) { } /// Construct a string ref from a pointer and length. /*implicit*/ constexpr StringRef(const char *data LLVM_LIFETIME_BOUND, size_t length) - : View(data, length) {} + : Data(data), Length(length) {} /// Construct a string ref from an std::string. - /*implicit*/ StringRef(const std::string &Str) : View(Str) {} + /*implicit*/ StringRef(const std::string &Str) + : Data(Str.data()), Length(Str.length()) {} /// Construct a string ref from an std::string_view. - /*implicit*/ constexpr StringRef(std::string_view Str) : View(Str) {} + /*implicit*/ constexpr StringRef(std::string_view Str) + : Data(Str.data()), Length(Str.size()) {} /// @} /// @name Iterators @@ -135,13 +141,13 @@ namespace llvm { /// data - Get a pointer to the start of the string (which may not be null /// terminated). - [[nodiscard]] constexpr const char *data() const { return View.data(); } + [[nodiscard]] constexpr const char *data() const { return Data; } /// empty - Check if the string is empty. [[nodiscard]] constexpr bool empty() const { return size() == 0; } /// size - Get the string size. - [[nodiscard]] constexpr size_t size() const { return View.size(); } + [[nodiscard]] constexpr size_t size() const { return Length; } /// front - Get the first character in the string. [[nodiscard]] char front() const { diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def index 9336f2a454ae47..0cbbbe823c06b5 100644 --- a/llvm/include/llvm/BinaryFormat/Dwarf.def +++ b/llvm/include/llvm/BinaryFormat/Dwarf.def @@ -728,7 +728,7 @@ HANDLE_DW_OP(0x24, shl, 0, 2, 2, DWARF) HANDLE_DW_OP(0x25, shr, 0, 2, 2, DWARF) HANDLE_DW_OP(0x26, shra, 0, 2, 2, DWARF) HANDLE_DW_OP(0x27, xor, 0, 2, 2, DWARF) -HANDLE_DW_OP(0x28, bra, 1, 0, 2, DWARF) +HANDLE_DW_OP(0x28, bra, 1, 1, 2, DWARF) HANDLE_DW_OP(0x29, eq, 0, 2, 2, DWARF) HANDLE_DW_OP(0x2a, ge, 0, 2, 2, DWARF) HANDLE_DW_OP(0x2b, gt, 0, 2, 2, DWARF) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h index 816e94362f0262..8ce6eaa69c4ab7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CodeGen.h" @@ -177,6 +178,8 @@ class GISelInstProfileBuilder { const GISelInstProfileBuilder &addNodeIDOpcode(unsigned Opc) const; const GISelInstProfileBuilder &addNodeIDRegType(const LLT Ty) const; const GISelInstProfileBuilder &addNodeIDRegType(const Register) const; + const GISelInstProfileBuilder & + addNodeIDRegType(MachineRegisterInfo::VRegAttrs) const; const GISelInstProfileBuilder & addNodeIDRegType(const TargetRegisterClass *RC) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h index 7b42722ca8d4f1..b4ff4cd178d757 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/Function.h" +#include "llvm/Transforms/Utils/SizeOpts.h" #include #include #include @@ -635,8 +636,12 @@ class GIMatchTableExecutor { bool shouldOptForSize(const MachineFunction *MF) const { const auto &F = MF->getFunction(); - return F.hasOptSize() || F.hasMinSize() || - (PSI && BFI && CurMBB && llvm::shouldOptForSize(*CurMBB, PSI, BFI)); + if (F.hasOptSize()) + return true; + if (CurMBB) + if (auto *BB = CurMBB->getBasicBlock()) + return llvm::shouldOptimizeForSize(BB, PSI, BFI); + return false; } public: diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index b6309a9ea0ec78..cd7ebcf54c9e1e 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -28,7 +28,7 @@ namespace llvm { class GenericMachineInstr : public MachineInstr { constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap | IsExact | Disjoint | NonNeg | - FmNoNans | FmNoInfs; + FmNoNans | FmNoInfs | SameSign; public: GenericMachineInstr() = delete; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index ab3025e4923cd0..14a641512a67d6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -72,15 +72,20 @@ class DstOp { LLT LLTTy; Register Reg; const TargetRegisterClass *RC; + MachineRegisterInfo::VRegAttrs Attrs; }; public: - enum class DstType { Ty_LLT, Ty_Reg, Ty_RC }; + enum class DstType { Ty_LLT, Ty_Reg, Ty_RC, Ty_VRegAttrs }; DstOp(unsigned R) : Reg(R), Ty(DstType::Ty_Reg) {} DstOp(Register R) : Reg(R), Ty(DstType::Ty_Reg) {} DstOp(const MachineOperand &Op) : Reg(Op.getReg()), Ty(DstType::Ty_Reg) {} DstOp(const LLT T) : LLTTy(T), Ty(DstType::Ty_LLT) {} DstOp(const TargetRegisterClass *TRC) : RC(TRC), Ty(DstType::Ty_RC) {} + DstOp(MachineRegisterInfo::VRegAttrs Attrs) + : Attrs(Attrs), Ty(DstType::Ty_VRegAttrs) {} + DstOp(RegClassOrRegBank RCOrRB, LLT Ty) + : Attrs({RCOrRB, Ty}), Ty(DstType::Ty_VRegAttrs) {} void addDefToMIB(MachineRegisterInfo &MRI, MachineInstrBuilder &MIB) const { switch (Ty) { @@ -93,6 +98,9 @@ class DstOp { case DstType::Ty_RC: MIB.addDef(MRI.createVirtualRegister(RC)); break; + case DstType::Ty_VRegAttrs: + MIB.addDef(MRI.createVirtualRegister(Attrs)); + break; } } @@ -104,6 +112,8 @@ class DstOp { return LLTTy; case DstType::Ty_Reg: return MRI.getType(Reg); + case DstType::Ty_VRegAttrs: + return Attrs.Ty; } llvm_unreachable("Unrecognised DstOp::DstType enum"); } @@ -114,12 +124,13 @@ class DstOp { } const TargetRegisterClass *getRegClass() const { - switch (Ty) { - case DstType::Ty_RC: - return RC; - default: - llvm_unreachable("Not a RC Operand"); - } + assert(Ty == DstType::Ty_RC && "Not a RC Operand"); + return RC; + } + + MachineRegisterInfo::VRegAttrs getVRegAttrs() const { + assert(Ty == DstType::Ty_VRegAttrs && "Not a VRegAttrs Operand"); + return Attrs; } DstType getDstOpKind() const { return Ty; } @@ -1255,7 +1266,8 @@ class MachineIRBuilder { /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, - const SrcOp &Op0, const SrcOp &Op1); + const SrcOp &Op0, const SrcOp &Op1, + std::optional Flgs = std::nullopt); /// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1 /// diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 95a8234d3c6080..4016247376c4f6 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -542,10 +542,6 @@ bool isConstFalseVal(const TargetLowering &TLI, int64_t Val, bool IsVector, /// TargetBooleanContents. int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP); -/// Returns true if the given block should be optimized for size. -bool shouldOptForSize(const MachineBasicBlock &MBB, ProfileSummaryInfo *PSI, - BlockFrequencyInfo *BFI); - using SmallInstListTy = GISelWorkList<4>; void saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver, diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index 76a7b8662bae66..ead6bbe1d5f641 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -119,6 +119,7 @@ class MachineInstr Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs. NoUSWrap = 1 << 20, // Instruction supports geps // no unsigned signed wrap. + SameSign = 1 << 21 // Both operands have the same sign. }; private: @@ -1764,8 +1765,8 @@ class MachineInstr bool isDereferenceableInvariantLoad() const; /// If the specified instruction is a PHI that always merges together the - /// same virtual register, return the register, otherwise return 0. - unsigned isConstantValuePHI() const; + /// same virtual register, return the register, otherwise return Register(). + Register isConstantValuePHI() const; /// Return true if this instruction has side effects that are not modeled /// by mayLoad / mayStore, etc. diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 7a2c23c13a3ce6..5dc51aaed81c7b 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -754,7 +754,7 @@ class MachineRegisterInfo { /// Returns register class or bank and low level type of \p Reg. Always safe /// to use. Special values are returned when \p Reg does not have some of the /// attributes. - VRegAttrs getVRegAttrs(Register Reg) { + VRegAttrs getVRegAttrs(Register Reg) const { return {getRegClassOrRegBank(Reg), getType(Reg)}; } diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index e12c1f076f133c..d1c71fc95818c8 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -261,11 +261,11 @@ namespace llvm { /// TailDuplicate - Duplicate blocks with unconditional branches /// into tails of their predecessors. - extern char &TailDuplicateID; + extern char &TailDuplicateLegacyID; /// Duplicate blocks with unconditional branches into tails of their /// predecessors. Variant that works before register allocation. - extern char &EarlyTailDuplicateID; + extern char &EarlyTailDuplicateLegacyID; /// MachineTraceMetrics - This pass computes critical path and CPU resource /// usage in an ensemble of traces. diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index bda0120a2df4aa..26488413fe5826 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -378,36 +378,48 @@ template<> struct simplify_type { /// the backend. struct SDNodeFlags { private: - bool NoUnsignedWrap : 1; - bool NoSignedWrap : 1; - bool Exact : 1; - bool Disjoint : 1; - bool NonNeg : 1; - bool NoNaNs : 1; - bool NoInfs : 1; - bool NoSignedZeros : 1; - bool AllowReciprocal : 1; - bool AllowContract : 1; - bool ApproximateFuncs : 1; - bool AllowReassociation : 1; - - // We assume instructions do not raise floating-point exceptions by default, - // and only those marked explicitly may do so. We could choose to represent - // this via a positive "FPExcept" flags like on the MI level, but having a - // negative "NoFPExcept" flag here makes the flag intersection logic more - // straightforward. - bool NoFPExcept : 1; - // Instructions with attached 'unpredictable' metadata on IR level. - bool Unpredictable : 1; + friend class SDNode; + + unsigned Flags = 0; + + template void setFlag(bool B) { + Flags = (Flags & ~Flag) | (B ? Flag : 0); + } public: + enum : unsigned { + None = 0, + NoUnsignedWrap = 1 << 0, + NoSignedWrap = 1 << 1, + Exact = 1 << 2, + Disjoint = 1 << 3, + NonNeg = 1 << 4, + NoNaNs = 1 << 5, + NoInfs = 1 << 6, + NoSignedZeros = 1 << 7, + AllowReciprocal = 1 << 8, + AllowContract = 1 << 9, + ApproximateFuncs = 1 << 10, + AllowReassociation = 1 << 11, + + // We assume instructions do not raise floating-point exceptions by default, + // and only those marked explicitly may do so. We could choose to represent + // this via a positive "FPExcept" flags like on the MI level, but having a + // negative "NoFPExcept" flag here makes the flag intersection logic more + // straightforward. + NoFPExcept = 1 << 12, + // Instructions with attached 'unpredictable' metadata on IR level. + Unpredictable = 1 << 13, + + // NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below + // the class definition when adding new flags. + + PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint | + NonNeg | NoNaNs | NoInfs, + }; + /// Default constructor turns off all optimization flags. - SDNodeFlags() - : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), - Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false), - NoSignedZeros(false), AllowReciprocal(false), AllowContract(false), - ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false), - Unpredictable(false) {} + SDNodeFlags() : Flags(0) {} /// Propagate the fast-math-flags from an IR FPMathOperator. void copyFMF(const FPMathOperator &FPMO) { @@ -421,71 +433,49 @@ struct SDNodeFlags { } // These are mutators for each flag. - void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } - void setNoSignedWrap(bool b) { NoSignedWrap = b; } - void setExact(bool b) { Exact = b; } - void setDisjoint(bool b) { Disjoint = b; } - void setNonNeg(bool b) { NonNeg = b; } - void setNoNaNs(bool b) { NoNaNs = b; } - void setNoInfs(bool b) { NoInfs = b; } - void setNoSignedZeros(bool b) { NoSignedZeros = b; } - void setAllowReciprocal(bool b) { AllowReciprocal = b; } - void setAllowContract(bool b) { AllowContract = b; } - void setApproximateFuncs(bool b) { ApproximateFuncs = b; } - void setAllowReassociation(bool b) { AllowReassociation = b; } - void setNoFPExcept(bool b) { NoFPExcept = b; } - void setUnpredictable(bool b) { Unpredictable = b; } + void setNoUnsignedWrap(bool b) { setFlag(b); } + void setNoSignedWrap(bool b) { setFlag(b); } + void setExact(bool b) { setFlag(b); } + void setDisjoint(bool b) { setFlag(b); } + void setNonNeg(bool b) { setFlag(b); } + void setNoNaNs(bool b) { setFlag(b); } + void setNoInfs(bool b) { setFlag(b); } + void setNoSignedZeros(bool b) { setFlag(b); } + void setAllowReciprocal(bool b) { setFlag(b); } + void setAllowContract(bool b) { setFlag(b); } + void setApproximateFuncs(bool b) { setFlag(b); } + void setAllowReassociation(bool b) { setFlag(b); } + void setNoFPExcept(bool b) { setFlag(b); } + void setUnpredictable(bool b) { setFlag(b); } // These are accessors for each flag. - bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } - bool hasNoSignedWrap() const { return NoSignedWrap; } - bool hasExact() const { return Exact; } - bool hasDisjoint() const { return Disjoint; } - bool hasNonNeg() const { return NonNeg; } - bool hasNoNaNs() const { return NoNaNs; } - bool hasNoInfs() const { return NoInfs; } - bool hasNoSignedZeros() const { return NoSignedZeros; } - bool hasAllowReciprocal() const { return AllowReciprocal; } - bool hasAllowContract() const { return AllowContract; } - bool hasApproximateFuncs() const { return ApproximateFuncs; } - bool hasAllowReassociation() const { return AllowReassociation; } - bool hasNoFPExcept() const { return NoFPExcept; } - bool hasUnpredictable() const { return Unpredictable; } + bool hasNoUnsignedWrap() const { return Flags & NoUnsignedWrap; } + bool hasNoSignedWrap() const { return Flags & NoSignedWrap; } + bool hasExact() const { return Flags & Exact; } + bool hasDisjoint() const { return Flags & Disjoint; } + bool hasNonNeg() const { return Flags & NonNeg; } + bool hasNoNaNs() const { return Flags & NoNaNs; } + bool hasNoInfs() const { return Flags & NoInfs; } + bool hasNoSignedZeros() const { return Flags & NoSignedZeros; } + bool hasAllowReciprocal() const { return Flags & AllowReciprocal; } + bool hasAllowContract() const { return Flags & AllowContract; } + bool hasApproximateFuncs() const { return Flags & ApproximateFuncs; } + bool hasAllowReassociation() const { return Flags & AllowReassociation; } + bool hasNoFPExcept() const { return Flags & NoFPExcept; } + bool hasUnpredictable() const { return Flags & Unpredictable; } bool operator==(const SDNodeFlags &Other) const { - return NoUnsignedWrap == Other.NoUnsignedWrap && - NoSignedWrap == Other.NoSignedWrap && Exact == Other.Exact && - Disjoint == Other.Disjoint && NonNeg == Other.NonNeg && - NoNaNs == Other.NoNaNs && NoInfs == Other.NoInfs && - NoSignedZeros == Other.NoSignedZeros && - AllowReciprocal == Other.AllowReciprocal && - AllowContract == Other.AllowContract && - ApproximateFuncs == Other.ApproximateFuncs && - AllowReassociation == Other.AllowReassociation && - NoFPExcept == Other.NoFPExcept && - Unpredictable == Other.Unpredictable; + return Flags == Other.Flags; } /// Clear any flags in this flag set that aren't also set in Flags. All /// flags will be cleared if Flags are undefined. - void intersectWith(const SDNodeFlags Flags) { - NoUnsignedWrap &= Flags.NoUnsignedWrap; - NoSignedWrap &= Flags.NoSignedWrap; - Exact &= Flags.Exact; - Disjoint &= Flags.Disjoint; - NonNeg &= Flags.NonNeg; - NoNaNs &= Flags.NoNaNs; - NoInfs &= Flags.NoInfs; - NoSignedZeros &= Flags.NoSignedZeros; - AllowReciprocal &= Flags.AllowReciprocal; - AllowContract &= Flags.AllowContract; - ApproximateFuncs &= Flags.ApproximateFuncs; - AllowReassociation &= Flags.AllowReassociation; - NoFPExcept &= Flags.NoFPExcept; - Unpredictable &= Flags.Unpredictable; - } + void intersectWith(const SDNodeFlags Flags) { this->Flags &= Flags.Flags; } }; +LLVM_DECLARE_ENUM_AS_BITMASK(decltype(SDNodeFlags::None), + SDNodeFlags::Unpredictable); + /// Represents one node in the SelectionDAG. /// class SDNode : public FoldingSetNode, public ilist_node { @@ -1029,10 +1019,7 @@ END_TWO_BYTE_PACK() void intersectFlagsWith(const SDNodeFlags Flags); bool hasPoisonGeneratingFlags() const { - SDNodeFlags Flags = getFlags(); - return Flags.hasNoUnsignedWrap() || Flags.hasNoSignedWrap() || - Flags.hasExact() || Flags.hasDisjoint() || Flags.hasNonNeg() || - Flags.hasNoNaNs() || Flags.hasNoInfs(); + return Flags.Flags & SDNodeFlags::PoisonGeneratingFlags; } void setCFIType(uint32_t Type) { CFIType = Type; } diff --git a/llvm/include/llvm/CodeGen/TailDuplication.h b/llvm/include/llvm/CodeGen/TailDuplication.h new file mode 100644 index 00000000000000..687a592ccf2fbf --- /dev/null +++ b/llvm/include/llvm/CodeGen/TailDuplication.h @@ -0,0 +1,47 @@ +//===- llvm/CodeGen/TailDuplication.h ---------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_TAILDUPLICATIONPASS_H +#define LLVM_CODEGEN_TAILDUPLICATIONPASS_H + +#include "llvm/CodeGen/MBFIWrapper.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +template +class TailDuplicatePassBase : public PassInfoMixin { +private: + std::unique_ptr MBFIW; + +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + +class EarlyTailDuplicatePass + : public TailDuplicatePassBase { +public: + MachineFunctionProperties getClearedProperties() const { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } +}; + +class TailDuplicatePass + : public TailDuplicatePassBase {}; + +} // namespace llvm + +extern template class llvm::TailDuplicatePassBase; +extern template class llvm::TailDuplicatePassBase; + +#endif // LLVM_CODEGEN_TAILDUPLICATIONPASS_H diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h index 3db6f33a8093f0..4de109739227ad 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -230,7 +230,8 @@ namespace llvm { /// Return true if this is an overloaded type for TableGen. bool isOverloaded() const { - return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny); + return (V == MVT::iAny || V == MVT::fAny || V == MVT::vAny || + V == MVT::pAny); } /// Return true if the bit size is a multiple of 8. diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 493c0cfcab60ce..6d6b92958b4321 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -338,9 +338,9 @@ def MetadataVT : ValueType<0, 505> { // Metadata let LLVMName = "Metadata"; } -// Pseudo valuetype mapped to the current pointer size to any address space. +// Pseudo valuetype to represent "pointer to any address space" // Should only be used in TableGen. -def iPTRAny : VTAny<506>; +def pAny : VTAny<506>; // Pseudo valuetype to represent "vector of any size" // Should only be used in TableGen. diff --git a/llvm/include/llvm/CodeGenTypes/MachineValueType.h b/llvm/include/llvm/CodeGenTypes/MachineValueType.h index c9a5098ef1623e..5c47ad4824a791 100644 --- a/llvm/include/llvm/CodeGenTypes/MachineValueType.h +++ b/llvm/include/llvm/CodeGenTypes/MachineValueType.h @@ -320,7 +320,7 @@ namespace llvm { llvm_unreachable("Value type is non-standard value, Other."); case iPTR: llvm_unreachable("Value type size is target-dependent. Ask TLI."); - case iPTRAny: + case pAny: case iAny: case fAny: case vAny: diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h index ef42cc5f798fd9..8a4740c1dd9cb9 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/JITLinkRedirectableSymbolManager.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H -#define LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLEMANAGER_H +#ifndef LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H +#define LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/Orc/RedirectionManager.h" @@ -103,4 +103,4 @@ class JITLinkRedirectableSymbolManager : public RedirectableSymbolManager, } // namespace orc } // namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_ORC_JITLINKREDIRECABLESYMBOLMANAGER_H diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h b/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h index 4adc3efad55730..cd185d54b2e7c8 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/ReOptimizeLayer.h @@ -178,4 +178,4 @@ class ReOptimizeLayer : public IRLayer, public ResourceManager { } // namespace orc } // namespace llvm -#endif +#endif // LLVM_EXECUTIONENGINE_ORC_REOPTIMIZELAYER_H diff --git a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h index 2a890905dc6323..8ff15b51f1abdf 100644 --- a/llvm/include/llvm/Frontend/OpenMP/ClauseT.h +++ b/llvm/include/llvm/Frontend/OpenMP/ClauseT.h @@ -239,7 +239,8 @@ struct MapperT { ENUM(MemoryOrder, AcqRel, Acquire, Relaxed, Release, SeqCst); ENUM(MotionExpectation, Present); // V5.2: [15.9.1] `task-dependence-type` modifier -ENUM(TaskDependenceType, In, Out, Inout, Mutexinoutset, Inoutset, Depobj); +ENUM(TaskDependenceType, Depobj, In, Inout, Inoutset, Mutexinoutset, Out, Sink, + Source); template // struct LoopIterationT { diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 70179bab475779..97496d4aae5ae2 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -130,6 +130,8 @@ def OMPC_Depobj : Clause<"depobj"> { } def OMPC_Destroy : Clause<"destroy"> { let clangClass = "OMPDestroyClause"; + let flangClass = "OmpDestroyClause"; + let isValueOptional = true; } def OMPC_Detach : Clause<"detach"> { let clangClass = "OMPDetachClause"; @@ -481,6 +483,7 @@ def OMPC_Untied : Clause<"untied"> { } def OMPC_Update : Clause<"update"> { let clangClass = "OMPUpdateClause"; + let flangClass = "OmpUpdateClause"; } def OMPC_Use : Clause<"use"> { let clangClass = "OMPUseClause"; diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index e893295e3272b9..89dfff256e0c43 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -92,7 +92,7 @@ namespace Intrinsic { /// return the existing declaration. /// /// The \p Tys parameter is for intrinsics with overloaded types (e.g., those - /// using iAny, fAny, vAny, or iPTRAny). For a declaration of an overloaded + /// using iAny, fAny, vAny, or pAny). For a declaration of an overloaded /// intrinsic, Tys must provide exactly one type for each overloaded type in /// the intrinsic. Function *getOrInsertDeclaration(Module *M, ID id, ArrayRef Tys = {}); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e91758ed34eb38..8ed57f818d6006 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -388,7 +388,7 @@ class LLVMAnyType : LLVMType { !eq(vt, iAny) : ArgKind.AnyInteger, !eq(vt, fAny) : ArgKind.AnyFloat, !eq(vt, vAny) : ArgKind.AnyVector, - !eq(vt, iPTRAny) : ArgKind.AnyPointer, + !eq(vt, pAny) : ArgKind.AnyPointer, ); let Sig = [ IIT_ARG.Number, @@ -412,8 +412,8 @@ class LLVMQualPointerType ]); } -class LLVMAnyPointerType : LLVMAnyType { - assert isAny, "iPTRAny should have isOverloaded"; +class LLVMAnyPointerType : LLVMAnyType { + assert isAny, "pAny should have isOverloaded"; } // Match the type of another intrinsic parameter. Number is an index into the diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 6df2eb156a0774..ddb47390537412 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -87,6 +87,7 @@ let TargetPrefix = "spv" in { def int_spv_wave_readlane : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrConvergent, IntrNoMem]>; def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>; def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; + def int_spv_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>; // Create resource handle given the binding information. Returns a // type appropriate for the kind of resource given the set id, binding id, diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 26f5d63553c5a8..54c070401ec8a4 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -101,7 +101,7 @@ void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &); void initializeEarlyIfConverterLegacyPass(PassRegistry &); void initializeEarlyIfPredicatorPass(PassRegistry &); void initializeEarlyMachineLICMPass(PassRegistry &); -void initializeEarlyTailDuplicatePass(PassRegistry &); +void initializeEarlyTailDuplicateLegacyPass(PassRegistry &); void initializeEdgeBundlesPass(PassRegistry &); void initializeEHContGuardCatchretPass(PassRegistry &); void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry &); @@ -300,7 +300,7 @@ void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &); void initializeStripDebugMachineModulePass(PassRegistry &); void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry &); -void initializeTailDuplicatePass(PassRegistry &); +void initializeTailDuplicateLegacyPass(PassRegistry &); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &); void initializeTargetPassConfigPass(PassRegistry &); void initializeTargetTransformInfoWrapperPassPass(PassRegistry &); diff --git a/llvm/include/llvm/MC/MCInstPrinter.h b/llvm/include/llvm/MC/MCInstPrinter.h index 60a901e3d0deae..e825c04a6dba6f 100644 --- a/llvm/include/llvm/MC/MCInstPrinter.h +++ b/llvm/include/llvm/MC/MCInstPrinter.h @@ -9,8 +9,10 @@ #ifndef LLVM_MC_MCINSTPRINTER_H #define LLVM_MC_MCINSTPRINTER_H +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" #include namespace llvm { @@ -24,7 +26,6 @@ class MCRegister; class MCRegisterInfo; class MCSubtargetInfo; class StringRef; -class raw_ostream; /// Convert `Bytes' to a hex string and output to `OS' void dumpBytes(ArrayRef Bytes, raw_ostream &OS); @@ -76,6 +77,8 @@ class MCInstPrinter { /// If true, symbolize branch target and memory reference operands. bool SymbolizeOperands = false; + SmallVector ColorStack{raw_ostream::Colors::RESET}; + /// Utility function for printing annotations. void printAnnotation(raw_ostream &OS, StringRef Annot); @@ -98,8 +101,8 @@ class MCInstPrinter { class WithMarkup { public: - LLVM_CTOR_NODISCARD WithMarkup(raw_ostream &OS, Markup M, bool EnableMarkup, - bool EnableColor); + LLVM_CTOR_NODISCARD WithMarkup(MCInstPrinter &IP, raw_ostream &OS, Markup M, + bool EnableMarkup, bool EnableColor); ~WithMarkup(); template WithMarkup &operator<<(T &O) { @@ -113,6 +116,7 @@ class MCInstPrinter { } private: + MCInstPrinter &IP; raw_ostream &OS; bool EnableMarkup; bool EnableColor; @@ -144,7 +148,7 @@ class MCInstPrinter { StringRef getOpcodeName(unsigned Opcode) const; /// Print the assembler register name. - virtual void printRegName(raw_ostream &OS, MCRegister Reg) const; + virtual void printRegName(raw_ostream &OS, MCRegister Reg); bool getUseMarkup() const { return UseMarkup; } void setUseMarkup(bool Value) { UseMarkup = Value; } @@ -152,7 +156,7 @@ class MCInstPrinter { bool getUseColor() const { return UseColor; } void setUseColor(bool Value) { UseColor = Value; } - WithMarkup markup(raw_ostream &OS, Markup M) const; + WithMarkup markup(raw_ostream &OS, Markup M); bool getPrintImmHex() const { return PrintImmHex; } void setPrintImmHex(bool Value) { PrintImmHex = Value; } diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index faa72d5f3144c4..70fba69778536e 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -206,7 +206,7 @@ class MCAsmParser { SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, - const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0; + MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0; /// Emit a note at the location \p L, with the message \p Msg. virtual void Note(SMLoc L, const Twine &Msg, diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index ad80c661147d6f..9e95625fd1d881 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -60,6 +60,7 @@ #include "llvm/CodeGen/SjLjEHPrepare.h" #include "llvm/CodeGen/StackColoring.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TailDuplication.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TwoAddressInstructionPass.h" #include "llvm/CodeGen/UnreachableBlockElim.h" diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index 4f32a917738c13..9d12a120ff7ac6 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -133,6 +133,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", VirtRegMapAnalysis()) MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass()) MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass()) MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass()) +MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass()) MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass()) MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass()) MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass()) @@ -157,6 +158,7 @@ MACHINE_FUNCTION_PASS("print", VirtRegMapPrinterPass(dbgs())) MACHINE_FUNCTION_PASS("require-all-machine-function-properties", RequireAllMachineFunctionPropertiesPass()) MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass()) +MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass()) MACHINE_FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) MACHINE_FUNCTION_PASS("two-address-instruction", TwoAddressInstructionPass()) MACHINE_FUNCTION_PASS("verify", MachineVerifierPass()) @@ -210,7 +212,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass) DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass) DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass) DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter) -DUMMY_MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass) DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass) DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", FixupStatepointCallerSavedPass) DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass) @@ -262,7 +263,6 @@ DUMMY_MACHINE_FUNCTION_PASS("simple-register-coalescing", RegisterCoalescerPass) DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass) DUMMY_MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass) DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass) -DUMMY_MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass) DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass) DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass) DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass) diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index b8b6c684717b05..559549b0a22cc9 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -78,12 +78,20 @@ class InstrProfWriter { // Whether to serialize the full schema. bool MemProfFullSchema; + // Whether to generated random memprof hotness for testing. + bool MemprofGenerateRandomHotness; + public: + // For memprof testing, random hotness can be assigned to the contexts if + // MemprofGenerateRandomHotness is enabled. The random seed can be either + // provided by MemprofGenerateRandomHotnessSeed, or if that is 0, one will be + // generated in the writer using the current time. InstrProfWriter( bool Sparse = false, uint64_t TemporalProfTraceReservoirSize = 0, uint64_t MaxTemporalProfTraceLength = 0, bool WritePrevVersion = false, memprof::IndexedVersion MemProfVersionRequested = memprof::Version0, - bool MemProfFullSchema = false); + bool MemProfFullSchema = false, bool MemprofGenerateRandomHotness = false, + unsigned MemprofGenerateRandomHotnessSeed = 0); ~InstrProfWriter(); StringMap &getProfileData() { return FunctionData; } diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index f8121d35732518..da2cc807370095 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -147,6 +147,15 @@ struct PortableMemInfoBlock { return Name; \ } #include "llvm/ProfileData/MIBEntryDef.inc" +#undef MIBEntryDef + + // Define setters for each type which can be called by the writer. +#define MIBEntryDef(NameTag, Name, Type) \ + void set##Name(Type NewVal) { \ + assert(Schema[llvm::to_underlying(Meta::Name)]); \ + Name = NewVal; \ + } +#include "llvm/ProfileData/MIBEntryDef.inc" #undef MIBEntryDef void clear() { *this = PortableMemInfoBlock(); } diff --git a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc index 902fa8f79ab816..e454524c9cb6a2 100644 --- a/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc +++ b/llvm/include/llvm/TargetParser/AArch64CPUFeatures.inc @@ -53,9 +53,9 @@ enum CPUFeatures { FEAT_EBF16, FEAT_RPRES, FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, + RESERVED_FEAT_SVE_BF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_EBF16, // previously used and now ABI legacy + RESERVED_FEAT_SVE_I8MM, // previously used and now ABI legacy FEAT_SVE_F32MM, FEAT_SVE_F64MM, FEAT_SVE2, diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index a4be24e32c5279..1e8ef0102450e4 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -175,6 +175,14 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueMapTypeRemapper *TypeMapper = nullptr, ValueMaterializer *Materializer = nullptr); +/// Clone OldFunc's attributes into NewFunc, transforming values based on the +/// mappings in VMap. +void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + ValueMapTypeRemapper *TypeMapper = nullptr, + ValueMaterializer *Materializer = nullptr); + void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h index 49dcec26dbc559..77ba5cd7f002e9 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Legality.h @@ -32,6 +32,7 @@ enum class ResultReason { DiffOpcodes, DiffTypes, DiffMathFlags, + DiffWrapFlags, }; #ifndef NDEBUG @@ -56,6 +57,8 @@ struct ToStr { return "DiffTypes"; case ResultReason::DiffMathFlags: return "DiffMathFlags"; + case ResultReason::DiffWrapFlags: + return "DiffWrapFlags"; } llvm_unreachable("Unknown ResultReason enum"); } diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 759db6db60774c..56abd03d623541 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -60,8 +60,8 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { uint64_t OldRes = Result; Result *= 10; Result += *Buffer-'0'; - if (Result < OldRes) { // Uh, oh, overflow detected!!! - LexError("constant bigger than 64 bits detected!"); + if (Result < OldRes) { // overflow detected. + LexError("constant bigger than 64 bits detected"); return 0; } } @@ -75,8 +75,8 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { Result *= 16; Result += hexDigitValue(*Buffer); - if (Result < OldRes) { // Uh, oh, overflow detected!!! - LexError("constant bigger than 64 bits detected!"); + if (Result < OldRes) { // overflow detected. + LexError("constant bigger than 64 bits detected"); return 0; } } @@ -99,7 +99,7 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, Pair[1] += hexDigitValue(*Buffer); } if (Buffer != End) - LexError("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected"); } /// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into @@ -118,7 +118,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, Pair[0] += hexDigitValue(*Buffer); } if (Buffer != End) - LexError("constant bigger than 128 bits detected!"); + LexError("constant bigger than 128 bits detected"); } // UnEscapeLexed - Run through the specified buffer and change \xx codes to the @@ -292,7 +292,7 @@ lltok::Kind LLLexer::LexDollar() { StrVal.assign(TokStart + 2, CurPtr - 1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - LexError("Null bytes are not allowed in names"); + LexError("NUL character is not allowed in names"); return lltok::Error; } return lltok::ComdatVar; @@ -354,7 +354,7 @@ lltok::Kind LLLexer::LexUIntID(lltok::Kind Token) { uint64_t Val = atoull(TokStart + 1, CurPtr); if ((unsigned)Val != Val) - LexError("invalid value number (too large)!"); + LexError("invalid value number (too large)"); UIntVal = unsigned(Val); return Token; } @@ -375,7 +375,7 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { StrVal.assign(TokStart+2, CurPtr-1); UnEscapeLexed(StrVal); if (StringRef(StrVal).contains(0)) { - LexError("Null bytes are not allowed in names"); + LexError("NUL character is not allowed in names"); return lltok::Error; } return Var; @@ -410,7 +410,7 @@ lltok::Kind LLLexer::LexQuote() { if (CurPtr[0] == ':') { ++CurPtr; if (StringRef(StrVal).contains(0)) { - LexError("Null bytes are not allowed in names"); + LexError("NUL character is not allowed in names"); kind = lltok::Error; } else { kind = lltok::LabelStr; @@ -492,7 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() { uint64_t NumBits = atoull(StartChar, CurPtr); if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) { - LexError("bitwidth for integer type out of range!"); + LexError("bitwidth for integer type out of range"); return lltok::Error; } TyVal = IntegerType::get(Context, NumBits); @@ -1122,7 +1122,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { uint64_t Val = atoull(TokStart, CurPtr); ++CurPtr; // Skip the colon. if ((unsigned)Val != Val) - LexError("invalid value number (too large)!"); + LexError("invalid value number (too large)"); UIntVal = unsigned(Val); return lltok::LabelID; } diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index cf5c35fe81b4c7..39fba1d0b527ef 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -38,7 +38,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeEarlyIfConverterLegacyPass(Registry); initializeEarlyIfPredicatorPass(Registry); initializeEarlyMachineLICMPass(Registry); - initializeEarlyTailDuplicatePass(Registry); + initializeEarlyTailDuplicateLegacyPass(Registry); initializeExpandLargeDivRemLegacyPassPass(Registry); initializeExpandLargeFpConvertLegacyPassPass(Registry); initializeExpandMemCmpLegacyPassPass(Registry); @@ -131,7 +131,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackSlotColoringPass(Registry); initializeStripDebugMachineModulePass(Registry); - initializeTailDuplicatePass(Registry); + initializeTailDuplicateLegacyPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionLegacyPassPass(Registry); initializeTypePromotionLegacyPass(Registry); diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp index ca4d0986b4426c..cfb4ae85aa4f77 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -356,6 +356,20 @@ GISelInstProfileBuilder::addNodeIDRegType(const RegisterBank *RB) const { return *this; } +const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDRegType( + MachineRegisterInfo::VRegAttrs Attrs) const { + addNodeIDRegType(Attrs.Ty); + + const RegClassOrRegBank &RCOrRB = Attrs.RCOrRB; + if (RCOrRB) { + if (const auto *RB = dyn_cast_if_present(RCOrRB)) + addNodeIDRegType(RB); + else + addNodeIDRegType(cast(RCOrRB)); + } + return *this; +} + const GISelInstProfileBuilder & GISelInstProfileBuilder::addNodeIDImmediate(int64_t Imm) const { ID.AddInteger(Imm); @@ -389,17 +403,7 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { const GISelInstProfileBuilder & GISelInstProfileBuilder::addNodeIDReg(Register Reg) const { - LLT Ty = MRI.getType(Reg); - if (Ty.isValid()) - addNodeIDRegType(Ty); - - if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) { - if (const auto *RB = dyn_cast_if_present(RCOrRB)) - addNodeIDRegType(RB); - else if (const auto *RC = - dyn_cast_if_present(RCOrRB)) - addNodeIDRegType(RC); - } + addNodeIDRegType(MRI.getVRegAttrs(Reg)); return *this; } diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 547529bbe699ab..bf8e847011d7c1 100644 --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -73,18 +73,24 @@ bool CSEMIRBuilder::canPerformCSEForOpc(unsigned Opc) const { void CSEMIRBuilder::profileDstOp(const DstOp &Op, GISelInstProfileBuilder &B) const { switch (Op.getDstOpKind()) { - case DstOp::DstType::Ty_RC: + case DstOp::DstType::Ty_RC: { B.addNodeIDRegType(Op.getRegClass()); break; + } case DstOp::DstType::Ty_Reg: { // Regs can have LLT&(RB|RC). If those exist, profile them as well. B.addNodeIDReg(Op.getReg()); break; } - default: + case DstOp::DstType::Ty_LLT: { B.addNodeIDRegType(Op.getLLTTy(*getMRI())); break; } + case DstOp::DstType::Ty_VRegAttrs: { + B.addNodeIDRegType(Op.getVRegAttrs()); + break; + } + } } void CSEMIRBuilder::profileSrcOp(const SrcOp &Op, diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 5381dce58f9e65..a87754389cc8ed 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -340,20 +340,17 @@ bool IRTranslator::translateCompare(const User &U, Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); CmpInst::Predicate Pred = CI->getPredicate(); + uint32_t Flags = MachineInstr::copyFlagsFromInstruction(*CI); if (CmpInst::isIntPredicate(Pred)) - MIRBuilder.buildICmp(Pred, Res, Op0, Op1); + MIRBuilder.buildICmp(Pred, Res, Op0, Op1, Flags); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); - else { - uint32_t Flags = 0; - if (CI) - Flags = MachineInstr::copyFlagsFromInstruction(*CI); + else MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags); - } return true; } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 59f2fc633f5de7..15b9164247846c 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -898,8 +898,9 @@ MachineIRBuilder::buildFPTrunc(const DstOp &Res, const SrcOp &Op, MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { - return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}); + const SrcOp &Op1, + std::optional Flags) { + return buildInstr(TargetOpcode::G_ICMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 513a49b4fc2e4d..dcbbb0871a8445 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -1619,11 +1619,6 @@ int64_t llvm::getICmpTrueVal(const TargetLowering &TLI, bool IsVector, llvm_unreachable("Invalid boolean contents"); } -bool llvm::shouldOptForSize(const MachineBasicBlock &MBB, - ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) { - return llvm::shouldOptimizeForSize(MBB.getBasicBlock(), PSI, BFI); -} - void llvm::saveUsesAndErase(MachineInstr &MI, MachineRegisterInfo &MRI, LostDebugLocObserver *LocObserver, SmallInstListTy &DeadInstChain) { diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 5a3806ce57335a..1c450b05f49e93 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -216,6 +216,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("exact", MIToken::kw_exact) .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) + .Case("samesign", MIToken::kw_samesign) .Case("nofpexcept", MIToken::kw_nofpexcept) .Case("unpredictable", MIToken::kw_unpredictable) .Case("debug-location", MIToken::kw_debug_location) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 3931da3eaae1d3..d7cd06759cfbb8 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -77,6 +77,7 @@ struct MIToken { kw_unpredictable, kw_nneg, kw_disjoint, + kw_samesign, kw_debug_location, kw_debug_instr_number, kw_dbg_instr_ref, diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp index 45847b5830da65..059814c70f828d 100644 --- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -1476,7 +1476,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Token.is(MIToken::kw_noconvergent) || Token.is(MIToken::kw_unpredictable) || Token.is(MIToken::kw_nneg) || - Token.is(MIToken::kw_disjoint)) { + Token.is(MIToken::kw_disjoint) || + Token.is(MIToken::kw_samesign)) { // clang-format on // Mine frame and fast math flags if (Token.is(MIToken::kw_frame_setup)) @@ -1513,6 +1514,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { Flags |= MachineInstr::NonNeg; if (Token.is(MIToken::kw_disjoint)) Flags |= MachineInstr::Disjoint; + if (Token.is(MIToken::kw_samesign)) + Flags |= MachineInstr::SameSign; lex(); } diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index a015cd3c2a55f9..658bbe0e577e5c 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -837,6 +837,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "disjoint "; if (MI.getFlag(MachineInstr::NoUSWrap)) OS << "nusw "; + if (MI.getFlag(MachineInstr::SameSign)) + OS << "samesign "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 0d78c2cafbaf63..941861da5c5693 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -596,6 +596,11 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::Disjoint; } + // Copy the samesign flag. + if (const ICmpInst *ICmp = dyn_cast(&I)) + if (ICmp->hasSameSign()) + MIFlags |= MachineInstr::MIFlag::SameSign; + // Copy the exact flag. if (const PossiblyExactOperator *PE = dyn_cast(&I)) if (PE->isExact()) @@ -1535,19 +1540,16 @@ bool MachineInstr::isDereferenceableInvariantLoad() const { return true; } -/// isConstantValuePHI - If the specified instruction is a PHI that always -/// merges together the same virtual register, return the register, otherwise -/// return 0. -unsigned MachineInstr::isConstantValuePHI() const { +Register MachineInstr::isConstantValuePHI() const { if (!isPHI()) - return 0; + return {}; assert(getNumOperands() >= 3 && "It's illegal to have a PHI without source operands"); Register Reg = getOperand(1).getReg(); for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) if (getOperand(i).getReg() != Reg) - return 0; + return {}; return Reg; } @@ -1773,6 +1775,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, OS << "nneg "; if (getFlag(MachineInstr::Disjoint)) OS << "disjoint "; + if (getFlag(MachineInstr::SameSign)) + OS << "samesign "; // Print the opcode name. if (TII) diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp index c7a673b12d8c50..f0a136751bbffa 100644 --- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -201,7 +201,7 @@ Register MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB, // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. - if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + if (Register ConstVal = InsertedPHI->isConstantValuePHI()) { InsertedPHI->eraseFromParent(); return ConstVal; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b800204d917503..ceaf5d664131c3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7355,7 +7355,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y))) // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y))) - SDValue X, Y, NotY; + SDValue X, Y, Z, NotY; for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE}) if (sd_match(N, m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) && @@ -7364,6 +7364,15 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, X, DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT)); + // Fold (and X, (rot (not Y), Z)) -> (and X, (not (rot Y, Z))) + for (unsigned Opc : {ISD::ROTL, ISD::ROTR}) + if (sd_match(N, m_And(m_Value(X), + m_OneUse(m_BinOp(Opc, m_Value(NotY), m_Value(Z))))) && + sd_match(NotY, m_Not(m_Value(Y))) && + (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse())) + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y, Z), VT)); + // Masking the negated extension of a boolean is just the zero-extended // boolean: // and (sub 0, zext(bool X)), 1 --> zext(bool X) diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp index 25f20d9c899bb0..b698ca675b65e2 100644 --- a/llvm/lib/CodeGen/TailDuplication.cpp +++ b/llvm/lib/CodeGen/TailDuplication.cpp @@ -12,13 +12,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/TailDuplication.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/CodeGen/TailDuplicator.h" +#include "llvm/IR/Analysis.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/PassRegistry.h" @@ -29,13 +32,13 @@ using namespace llvm; namespace { -class TailDuplicateBase : public MachineFunctionPass { +class TailDuplicateBaseLegacy : public MachineFunctionPass { TailDuplicator Duplicator; std::unique_ptr MBFIW; bool PreRegAlloc; public: - TailDuplicateBase(char &PassID, bool PreRegAlloc) - : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} + TailDuplicateBaseLegacy(char &PassID, bool PreRegAlloc) + : MachineFunctionPass(PassID), PreRegAlloc(PreRegAlloc) {} bool runOnMachineFunction(MachineFunction &MF) override; @@ -47,19 +50,19 @@ class TailDuplicateBase : public MachineFunctionPass { } }; -class TailDuplicate : public TailDuplicateBase { +class TailDuplicateLegacy : public TailDuplicateBaseLegacy { public: static char ID; - TailDuplicate() : TailDuplicateBase(ID, false) { - initializeTailDuplicatePass(*PassRegistry::getPassRegistry()); + TailDuplicateLegacy() : TailDuplicateBaseLegacy(ID, false) { + initializeTailDuplicateLegacyPass(*PassRegistry::getPassRegistry()); } }; -class EarlyTailDuplicate : public TailDuplicateBase { +class EarlyTailDuplicateLegacy : public TailDuplicateBaseLegacy { public: static char ID; - EarlyTailDuplicate() : TailDuplicateBase(ID, true) { - initializeEarlyTailDuplicatePass(*PassRegistry::getPassRegistry()); + EarlyTailDuplicateLegacy() : TailDuplicateBaseLegacy(ID, true) { + initializeEarlyTailDuplicateLegacyPass(*PassRegistry::getPassRegistry()); } MachineFunctionProperties getClearedProperties() const override { @@ -70,17 +73,18 @@ class EarlyTailDuplicate : public TailDuplicateBase { } // end anonymous namespace -char TailDuplicate::ID; -char EarlyTailDuplicate::ID; +char TailDuplicateLegacy::ID; +char EarlyTailDuplicateLegacy::ID; -char &llvm::TailDuplicateID = TailDuplicate::ID; -char &llvm::EarlyTailDuplicateID = EarlyTailDuplicate::ID; +char &llvm::TailDuplicateLegacyID = TailDuplicateLegacy::ID; +char &llvm::EarlyTailDuplicateLegacyID = EarlyTailDuplicateLegacy::ID; -INITIALIZE_PASS(TailDuplicate, DEBUG_TYPE, "Tail Duplication", false, false) -INITIALIZE_PASS(EarlyTailDuplicate, "early-tailduplication", +INITIALIZE_PASS(TailDuplicateLegacy, DEBUG_TYPE, "Tail Duplication", false, + false) +INITIALIZE_PASS(EarlyTailDuplicateLegacy, "early-tailduplication", "Early Tail Duplication", false, false) -bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { +bool TailDuplicateBaseLegacy::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -100,3 +104,36 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) { return MadeChange; } + +template +PreservedAnalyses TailDuplicatePassBase::run( + MachineFunction &MF, MachineFunctionAnalysisManager &MFAM) { + MFPropsModifier _(static_cast(*this), MF); + + if (MF.getFunction().hasOptNone()) + return PreservedAnalyses::all(); + + auto *MBPI = &MFAM.getResult(MF); + auto *PSI = MFAM.getResult(MF) + .getCachedResult( + *MF.getFunction().getParent()); + auto *MBFI = (PSI && PSI->hasProfileSummary() + ? &MFAM.getResult(MF) + : nullptr); + if (MBFI) + MBFIW = std::make_unique(*MBFI); + + TailDuplicator Duplicator; + Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI, + /*LayoutMode=*/false); + bool MadeChange = false; + while (Duplicator.tailDuplicateBlocks()) + MadeChange = true; + + if (!MadeChange) + return PreservedAnalyses::all(); + return getMachineFunctionPassPreservedAnalyses(); +} + +template class llvm::TailDuplicatePassBase; +template class llvm::TailDuplicatePassBase; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 12225c9946e9fc..aff74104006e5a 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -290,10 +290,10 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID, if (StandardID == &BranchFolderPassID) return applyDisable(TargetID, DisableBranchFold); - if (StandardID == &TailDuplicateID) + if (StandardID == &TailDuplicateLegacyID) return applyDisable(TargetID, DisableTailDuplicate); - if (StandardID == &EarlyTailDuplicateID) + if (StandardID == &EarlyTailDuplicateLegacyID) return applyDisable(TargetID, DisableEarlyTailDup); if (StandardID == &MachineBlockPlacementID) @@ -1279,7 +1279,7 @@ void TargetPassConfig::addMachinePasses() { /// Add passes that optimize machine instructions in SSA form. void TargetPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - addPass(&EarlyTailDuplicateID); + addPass(&EarlyTailDuplicateLegacyID); // Optimize PHIs before DCE: removing dead PHI cycles may make more // instructions dead. @@ -1507,7 +1507,7 @@ void TargetPassConfig::addMachineLateOptimization() { // performance for targets that require Structured Control Flow. // In addition it can also make CFG irreducible. Thus we disable it. if (!TM->requiresStructuredCFG()) - addPass(&TailDuplicateID); + addPass(&TailDuplicateLegacyID); // Copy propagation. addPass(&MachineCopyPropagationID); diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 25ab154a01d674..86c08cbdee5f32 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -701,16 +701,15 @@ Error ObjectLinkingLayer::handleRemoveResources(JITDylib &JD, ResourceKey K) { void ObjectLinkingLayer::handleTransferResources(JITDylib &JD, ResourceKey DstKey, ResourceKey SrcKey) { - auto I = Allocs.find(SrcKey); - if (I != Allocs.end()) { - auto &SrcAllocs = I->second; + if (Allocs.contains(SrcKey)) { + // DstKey may not be in the DenseMap yet, so the following line may resize + // the container and invalidate iterators and value references. auto &DstAllocs = Allocs[DstKey]; + auto &SrcAllocs = Allocs[SrcKey]; DstAllocs.reserve(DstAllocs.size() + SrcAllocs.size()); for (auto &Alloc : SrcAllocs) DstAllocs.push_back(std::move(Alloc)); - // Erase SrcKey entry using value rather than iterator I: I may have been - // invalidated when we looked up DstKey. Allocs.erase(SrcKey); } diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp index bc3433d011551c..a73b2310d193aa 100644 --- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp @@ -430,16 +430,15 @@ Error RTDyldObjectLinkingLayer::handleRemoveResources(JITDylib &JD, void RTDyldObjectLinkingLayer::handleTransferResources(JITDylib &JD, ResourceKey DstKey, ResourceKey SrcKey) { - auto I = MemMgrs.find(SrcKey); - if (I != MemMgrs.end()) { - auto &SrcMemMgrs = I->second; + if (MemMgrs.contains(SrcKey)) { + // DstKey may not be in the DenseMap yet, so the following line may resize + // the container and invalidate iterators and value references. auto &DstMemMgrs = MemMgrs[DstKey]; + auto &SrcMemMgrs = MemMgrs[SrcKey]; DstMemMgrs.reserve(DstMemMgrs.size() + SrcMemMgrs.size()); for (auto &MemMgr : SrcMemMgrs) DstMemMgrs.push_back(std::move(MemMgr)); - // Erase SrcKey entry using value rather than iterator I: I may have been - // invalidated when we looked up DstKey. MemMgrs.erase(SrcKey); } } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ee807ca13787d5..ffcab98db9aa02 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4121,8 +4121,9 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::getIndexedType(GEP.getSourceElementType(), Idxs); Check(ElTy, "Invalid indices for GEP pointer type!", &GEP); - Check(GEP.getType()->isPtrOrPtrVectorTy() && - GEP.getResultElementType() == ElTy, + PointerType *PtrTy = dyn_cast(GEP.getType()->getScalarType()); + + Check(PtrTy && GEP.getResultElementType() == ElTy, "GEP is not of right type for indices!", &GEP, ElTy); if (auto *GEPVTy = dyn_cast(GEP.getType())) { @@ -4144,10 +4145,8 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - if (auto *PTy = dyn_cast(GEP.getType())) { - Check(GEP.getAddressSpace() == PTy->getAddressSpace(), - "GEP address space doesn't match type", &GEP); - } + Check(GEP.getAddressSpace() == PtrTy->getAddressSpace(), + "GEP address space doesn't match type", &GEP); visitInstruction(GEP); } diff --git a/llvm/lib/MC/MCInstPrinter.cpp b/llvm/lib/MC/MCInstPrinter.cpp index e4faeba04a8fd7..069716a3ecf9b7 100644 --- a/llvm/lib/MC/MCInstPrinter.cpp +++ b/llvm/lib/MC/MCInstPrinter.cpp @@ -43,7 +43,7 @@ StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { return MII.getName(Opcode); } -void MCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void MCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { llvm_unreachable("Target should implement this"); } @@ -224,29 +224,32 @@ format_object MCInstPrinter::formatHex(uint64_t Value) const { llvm_unreachable("unsupported print style"); } -MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS, - Markup S) const { - return WithMarkup(OS, S, getUseMarkup(), getUseColor()); +MCInstPrinter::WithMarkup MCInstPrinter::markup(raw_ostream &OS, Markup S) { + return WithMarkup(*this, OS, S, getUseMarkup(), getUseColor()); } -MCInstPrinter::WithMarkup::WithMarkup(raw_ostream &OS, Markup M, - bool EnableMarkup, bool EnableColor) - : OS(OS), EnableMarkup(EnableMarkup), EnableColor(EnableColor) { +MCInstPrinter::WithMarkup::WithMarkup(MCInstPrinter &IP, raw_ostream &OS, + Markup M, bool EnableMarkup, + bool EnableColor) + : IP(IP), OS(OS), EnableMarkup(EnableMarkup), EnableColor(EnableColor) { if (EnableColor) { + raw_ostream::Colors Color = raw_ostream::Colors::RESET; switch (M) { case Markup::Immediate: - OS.changeColor(raw_ostream::RED); + Color = raw_ostream::RED; break; case Markup::Register: - OS.changeColor(raw_ostream::CYAN); + Color = raw_ostream::CYAN; break; case Markup::Target: - OS.changeColor(raw_ostream::YELLOW); + Color = raw_ostream::YELLOW; break; case Markup::Memory: - OS.changeColor(raw_ostream::GREEN); + Color = raw_ostream::GREEN; break; } + IP.ColorStack.push_back(Color); + OS.changeColor(Color); } if (EnableMarkup) { @@ -270,6 +273,8 @@ MCInstPrinter::WithMarkup::WithMarkup(raw_ostream &OS, Markup M, MCInstPrinter::WithMarkup::~WithMarkup() { if (EnableMarkup) OS << '>'; - if (EnableColor) - OS.resetColor(); + if (!EnableColor) + return; + IP.ColorStack.pop_back(); + OS << IP.ColorStack.back(); } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 4774e5112af535..ecccb228c8c387 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -264,7 +264,7 @@ class AsmParser : public MCAsmParser { SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, - const MCInstrInfo *MII, const MCInstPrinter *IP, + const MCInstrInfo *MII, MCInstPrinter *IP, MCAsmParserSemaCallback &SI) override; bool parseExpression(const MCExpr *&Res); @@ -6006,7 +6006,7 @@ bool AsmParser::parseMSInlineAsm( SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, - const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { + MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { SmallVector InputDecls; SmallVector OutputDecls; SmallVector InputDeclsAddressOf; diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index d88fd09a1aa07c..a7f37d81f64092 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -539,7 +539,7 @@ class MasmParser : public MCAsmParser { SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, - const MCInstrInfo *MII, const MCInstPrinter *IP, + const MCInstrInfo *MII, MCInstPrinter *IP, MCAsmParserSemaCallback &SI) override; bool parseExpression(const MCExpr *&Res); @@ -7340,7 +7340,7 @@ bool MasmParser::parseMSInlineAsm( SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, - const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { + MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { SmallVector InputDecls; SmallVector OutputDecls; SmallVector InputDeclsAddressOf; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d1f75dfb5350a0..a879918005cad8 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -125,6 +125,7 @@ #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/StackColoring.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TailDuplication.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TwoAddressInstructionPass.h" #include "llvm/CodeGen/TypePromotion.h" diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 1a3721bf103503..0ab9f942a08589 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" @@ -28,6 +29,7 @@ #include "llvm/Support/OnDiskHashTable.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -184,13 +186,25 @@ class InstrProfRecordWriterTrait { InstrProfWriter::InstrProfWriter( bool Sparse, uint64_t TemporalProfTraceReservoirSize, uint64_t MaxTemporalProfTraceLength, bool WritePrevVersion, - memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema) + memprof::IndexedVersion MemProfVersionRequested, bool MemProfFullSchema, + bool MemprofGenerateRandomHotness, + unsigned MemprofGenerateRandomHotnessSeed) : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength), TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize), InfoObj(new InstrProfRecordWriterTrait()), WritePrevVersion(WritePrevVersion), MemProfVersionRequested(MemProfVersionRequested), - MemProfFullSchema(MemProfFullSchema) {} + MemProfFullSchema(MemProfFullSchema), + MemprofGenerateRandomHotness(MemprofGenerateRandomHotness) { + // Set up the random number seed if requested. + if (MemprofGenerateRandomHotness) { + unsigned seed = MemprofGenerateRandomHotnessSeed + ? MemprofGenerateRandomHotnessSeed + : std::time(nullptr); + errs() << "random hotness seed = " << seed << "\n"; + std::srand(seed); + } +} InstrProfWriter::~InstrProfWriter() { delete InfoObj; } @@ -273,13 +287,34 @@ void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, void InstrProfWriter::addMemProfRecord( const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) { - auto [Iter, Inserted] = MemProfData.Records.insert({Id, Record}); + auto NewRecord = Record; + // Provoke random hotness values if requested. We specify the lifetime access + // density and lifetime length that will result in a cold or not cold hotness. + // See the logic in getAllocType() in Analysis/MemoryProfileInfo.cpp. + if (MemprofGenerateRandomHotness) { + for (auto &Alloc : NewRecord.AllocSites) { + // To get a not cold context, set the lifetime access density to the + // maximum value and the lifetime to 0. + uint64_t NewTLAD = std::numeric_limits::max(); + uint64_t NewTL = 0; + bool IsCold = std::rand() % 2; + if (IsCold) { + // To get a cold context, set the lifetime access density to 0 and the + // lifetime to the maximum value. + NewTLAD = 0; + NewTL = std::numeric_limits::max(); + } + Alloc.Info.setTotalLifetimeAccessDensity(NewTLAD); + Alloc.Info.setTotalLifetime(NewTL); + } + } + auto [Iter, Inserted] = MemProfData.Records.insert({Id, NewRecord}); // If we inserted a new record then we are done. if (Inserted) { return; } memprof::IndexedMemProfRecord &Existing = Iter->second; - Existing.merge(Record); + Existing.merge(NewRecord); } bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, diff --git a/llvm/lib/SandboxIR/Context.cpp b/llvm/lib/SandboxIR/Context.cpp index 5e5cbbbc4515d2..b86ed5864c1ac1 100644 --- a/llvm/lib/SandboxIR/Context.cpp +++ b/llvm/lib/SandboxIR/Context.cpp @@ -681,7 +681,7 @@ void Context::runMoveInstrCallbacks(Instruction *I, const BBIterator &WhereIt) { // An arbitrary limit, to check for accidental misuse. We expect a small number // of callbacks to be registered at a time, but we can increase this number if // we discover we needed more. -static constexpr int MaxRegisteredCallbacks = 16; +[[maybe_unused]] static constexpr int MaxRegisteredCallbacks = 16; Context::CallbackID Context::registerEraseInstrCallback(EraseInstrCallback CB) { assert(EraseInstrCallbacks.size() <= MaxRegisteredCallbacks && diff --git a/llvm/lib/Support/TrieRawHashMap.cpp b/llvm/lib/Support/TrieRawHashMap.cpp index 9eeac0bbc5c2c3..11d79a62d011dd 100644 --- a/llvm/lib/Support/TrieRawHashMap.cpp +++ b/llvm/lib/Support/TrieRawHashMap.cpp @@ -79,7 +79,7 @@ class TrieSubtrie final static constexpr size_t sizeToAlloc(unsigned NumBits) { assert(NumBits < 20 && "Tries should have fewer than ~1M slots"); - size_t Count = 1u << NumBits; + unsigned Count = 1u << NumBits; return totalSizeToAlloc>(Count); } @@ -424,7 +424,7 @@ unsigned ThreadSafeTrieRawHashMapBase::getNumSlotUsed( return 0; unsigned Num = 0; for (unsigned I = 0, E = S->size(); I < E; ++I) - if (auto *E = S->load(I)) + if (S->load(I)) ++Num; return Num; } diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index 8fe7f69ecf8e59..1e93b2c160ba58 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -89,7 +89,7 @@ TGLexer::TGLexer(SourceMgr &SM, ArrayRef Macros) : SrcMgr(SM) { for (StringRef MacroName : Macros) { const char *End = lexMacroName(MacroName); if (End != MacroName.end()) - PrintFatalError("Invalid macro name `" + MacroName + + PrintFatalError("invalid macro name `" + MacroName + "` specified on command line"); DefinedMacros.insert(MacroName); @@ -188,7 +188,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { return LexIdentifier(); // Unknown character, emit an error. - return ReturnError(TokStart, "Unexpected character"); + return ReturnError(TokStart, "unexpected character"); case EOF: // Lex next token, if we just left an include file. // Note that leaving an include file means that the next @@ -231,7 +231,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { ++CurPtr; // Eat third dot. return tgtok::dotdotdot; } - return ReturnError(TokStart, "Invalid '..' punctuation"); + return ReturnError(TokStart, "invalid '..' punctuation"); } return tgtok::dot; @@ -255,7 +255,7 @@ tgtok::TokKind TGLexer::LexToken(bool FileOrLineStart) { if (SkipCComment()) return tgtok::Error; } else // Otherwise, this is an error. - return ReturnError(TokStart, "Unexpected character"); + return ReturnError(TokStart, "unexpected character"); return LexToken(FileOrLineStart); case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': @@ -313,10 +313,10 @@ tgtok::TokKind TGLexer::LexString() { while (*CurPtr != '"') { // If we hit the end of the buffer, report an error. if (*CurPtr == 0 && CurPtr == CurBuf.end()) - return ReturnError(StrStart, "End of file in string literal"); + return ReturnError(StrStart, "end of file in string literal"); if (*CurPtr == '\n' || *CurPtr == '\r') - return ReturnError(StrStart, "End of line in string literal"); + return ReturnError(StrStart, "end of line in string literal"); if (*CurPtr != '\\') { CurStrVal += *CurPtr++; @@ -346,7 +346,7 @@ tgtok::TokKind TGLexer::LexString() { // If we hit the end of the buffer, report an error. case '\0': if (CurPtr == CurBuf.end()) - return ReturnError(StrStart, "End of file in string literal"); + return ReturnError(StrStart, "end of file in string literal"); [[fallthrough]]; default: return ReturnError(CurPtr, "invalid escape in string literal"); @@ -359,7 +359,7 @@ tgtok::TokKind TGLexer::LexString() { tgtok::TokKind TGLexer::LexVarName() { if (!isValidIDChar(CurPtr[0], /*First=*/true)) - return ReturnError(TokStart, "Invalid variable name"); + return ReturnError(TokStart, "invalid variable name"); // Otherwise, we're ok, consume the rest of the characters. const char *VarNameStart = CurPtr++; @@ -433,7 +433,7 @@ bool TGLexer::LexInclude() { tgtok::TokKind Tok = LexToken(); if (Tok == tgtok::Error) return true; if (Tok != tgtok::StrVal) { - PrintError(getLoc(), "Expected filename after include"); + PrintError(getLoc(), "expected filename after include"); return true; } @@ -444,7 +444,7 @@ bool TGLexer::LexInclude() { CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), IncludedFile); if (!CurBuffer) { - PrintError(getLoc(), "Could not find include file '" + Filename + "'"); + PrintError(getLoc(), "could not find include file '" + Filename + "'"); return true; } @@ -476,7 +476,7 @@ bool TGLexer::SkipCComment() { int CurChar = getNextChar(); switch (CurChar) { case EOF: - PrintError(TokStart, "Unterminated comment!"); + PrintError(TokStart, "unterminated comment"); return true; case '*': // End of the comment? @@ -543,7 +543,7 @@ tgtok::TokKind TGLexer::LexNumber() { // Requires at least one digit. if (CurPtr == NumStart) - return ReturnError(TokStart, "Invalid number"); + return ReturnError(TokStart, "invalid number"); errno = 0; if (IsMinus) @@ -552,9 +552,9 @@ tgtok::TokKind TGLexer::LexNumber() { CurIntVal = strtoull(NumStart, nullptr, Base); if (errno == EINVAL) - return ReturnError(TokStart, "Invalid number"); + return ReturnError(TokStart, "invalid number"); if (errno == ERANGE) - return ReturnError(TokStart, "Number out of range"); + return ReturnError(TokStart, "number out of range"); return Base == 2 ? tgtok::BinaryIntVal : tgtok::IntVal; } @@ -580,13 +580,13 @@ tgtok::TokKind TGLexer::LexBracket() { } } - return ReturnError(CodeStart - 2, "Unterminated code block"); + return ReturnError(CodeStart - 2, "unterminated code block"); } /// LexExclaim - Lex '!' and '![a-zA-Z]+'. tgtok::TokKind TGLexer::LexExclaim() { if (!isAlpha(*CurPtr)) - return ReturnError(CurPtr - 1, "Invalid \"!operator\""); + return ReturnError(CurPtr - 1, "invalid \"!operator\""); const char *Start = CurPtr++; while (isAlpha(*CurPtr)) @@ -648,7 +648,8 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("repr", tgtok::XRepr) .Default(tgtok::Error); - return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); + return Kind != tgtok::Error ? Kind + : ReturnError(Start - 1, "unknown operator"); } bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { @@ -662,17 +663,17 @@ bool TGLexer::prepExitInclude(bool IncludeStackMustBeEmpty) { // Pop the preprocessing controls from the include stack. if (PrepIncludeStack.empty()) { - PrintFatalError("Preprocessor include stack is empty"); + PrintFatalError("preprocessor include stack is empty"); } PrepIncludeStack.pop_back(); if (IncludeStackMustBeEmpty) { if (!PrepIncludeStack.empty()) - PrintFatalError("Preprocessor include stack is not empty"); + PrintFatalError("preprocessor include stack is not empty"); } else { if (PrepIncludeStack.empty()) - PrintFatalError("Preprocessor include stack is empty"); + PrintFatalError("preprocessor include stack is empty"); } return true; @@ -732,7 +733,7 @@ bool TGLexer::prepEatPreprocessorDirective(tgtok::TokKind Kind) { return true; } - PrintFatalError("Unsupported preprocessing token in " + PrintFatalError("unsupported preprocessing token in " "prepEatPreprocessorDirective()"); return false; } @@ -748,7 +749,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, StringRef MacroName = prepLexMacroName(); StringRef IfTokName = Kind == tgtok::Ifdef ? "#ifdef" : "#ifndef"; if (MacroName.empty()) - return ReturnError(TokStart, "Expected macro name after " + IfTokName); + return ReturnError(TokStart, "expected macro name after " + IfTokName); bool MacroIsDefined = DefinedMacros.count(MacroName) != 0; @@ -763,7 +764,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, {tgtok::Ifdef, MacroIsDefined, SMLoc::getFromPointer(TokStart)}); if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after " + + return ReturnError(CurPtr, "only comments are supported after " + IfTokName + " NAME"); // If we were not processing tokens before this #ifdef, @@ -794,7 +795,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, if (IfdefEntry.Kind != tgtok::Ifdef) { PrintError(TokStart, "double #else"); - return ReturnError(IfdefEntry.SrcPos, "Previous #else is here"); + return ReturnError(IfdefEntry.SrcPos, "previous #else is here"); } // Replace the corresponding #ifdef's control with its negation @@ -804,7 +805,7 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, {Kind, !IfdefEntry.IsDefined, SMLoc::getFromPointer(TokStart)}); if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after #else"); + return ReturnError(CurPtr, "only comments are supported after #else"); // If we were processing tokens before this #else, // we have to start skipping lines until the matching #endif. @@ -827,12 +828,12 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, if (IfdefOrElseEntry.Kind != tgtok::Ifdef && IfdefOrElseEntry.Kind != tgtok::Else) { - PrintFatalError("Invalid preprocessor control on the stack"); + PrintFatalError("invalid preprocessor control on the stack"); return tgtok::Error; } if (!prepSkipDirectiveEnd()) - return ReturnError(CurPtr, "Only comments are supported after #endif"); + return ReturnError(CurPtr, "only comments are supported after #endif"); PrepIncludeStack.back()->pop_back(); @@ -847,15 +848,15 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, } else if (Kind == tgtok::Define) { StringRef MacroName = prepLexMacroName(); if (MacroName.empty()) - return ReturnError(TokStart, "Expected macro name after #define"); + return ReturnError(TokStart, "expected macro name after #define"); if (!DefinedMacros.insert(MacroName).second) PrintWarning(getLoc(), - "Duplicate definition of macro: " + Twine(MacroName)); + "duplicate definition of macro: " + Twine(MacroName)); if (!prepSkipDirectiveEnd()) return ReturnError(CurPtr, - "Only comments are supported after #define NAME"); + "only comments are supported after #define NAME"); if (!ReturnNextLiveToken) { PrintFatalError("#define must be ignored during the lines skipping"); @@ -865,13 +866,13 @@ tgtok::TokKind TGLexer::lexPreprocessor(tgtok::TokKind Kind, return LexToken(); } - PrintFatalError("Preprocessing directive is not supported"); + PrintFatalError("preprocessing directive is not supported"); return tgtok::Error; } bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { if (!MustNeverBeFalse) - PrintFatalError("Invalid recursion."); + PrintFatalError("invalid recursion."); do { // Skip all symbols to the line end. @@ -917,7 +918,7 @@ bool TGLexer::prepSkipRegion(bool MustNeverBeFalse) { // due to #else or #endif. if (prepIsProcessingEnabled()) { if (Kind != tgtok::Else && Kind != tgtok::Endif) { - PrintFatalError("Tokens processing was enabled by an unexpected " + PrintFatalError("tokens processing was enabled by an unexpected " "preprocessing directive"); return false; } @@ -1032,7 +1033,7 @@ bool TGLexer::prepSkipDirectiveEnd() { return false; } else { TokStart = CurPtr; - PrintError(CurPtr, "Unexpected character"); + PrintError(CurPtr, "unexpected character"); return false; } @@ -1067,8 +1068,8 @@ void TGLexer::prepReportPreprocessorStackError() { "empty control stack"); auto &PrepControl = PrepIncludeStack.back()->back(); - PrintError(CurBuf.end(), "Reached EOF without matching #endif"); - PrintError(PrepControl.SrcPos, "The latest preprocessor control is here"); + PrintError(CurBuf.end(), "reached EOF without matching #endif"); + PrintError(PrepControl.SrcPos, "the latest preprocessor control is here"); TokStart = CurPtr; } diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 9bb508b783c36a..6854cccaafa1d7 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -73,7 +73,8 @@ def SVEUnsupported : AArch64Unsupported { SVE2Unsupported.F); } -let F = [HasSME2p2, HasSVE2p2orSME2p2] in +let F = [HasSME2p2, HasSVE2p2orSME2p2, HasNonStreamingSVEorSME2p2, + HasNonStreamingSVE2p2orSME2p2] in def SME2p2Unsupported : AArch64Unsupported; def SME2p1Unsupported : AArch64Unsupported { diff --git a/llvm/lib/Target/AArch64/AArch64FMV.td b/llvm/lib/Target/AArch64/AArch64FMV.td index 7146b041fe5d15..12d841445b80f7 100644 --- a/llvm/lib/Target/AArch64/AArch64FMV.td +++ b/llvm/lib/Target/AArch64/AArch64FMV.td @@ -81,9 +81,6 @@ def : FMVExtension<"sme-i16i64", "FEAT_SME_I64", "+sme,+sme-i16i64,+bf16", 570>; def : FMVExtension<"sme2", "FEAT_SME2", "+sme2,+sme,+bf16", 580>; def : FMVExtension<"ssbs", "FEAT_SSBS2", "+ssbs", 490>; def : FMVExtension<"sve", "FEAT_SVE", "+sve,+fullfp16,+fp-armv8,+neon", 310>; -def : FMVExtension<"sve-bf16", "FEAT_SVE_BF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 320>; -def : FMVExtension<"sve-ebf16", "FEAT_SVE_EBF16", "+sve,+bf16,+fullfp16,+fp-armv8,+neon", 330>; -def : FMVExtension<"sve-i8mm", "FEAT_SVE_I8MM", "+sve,+i8mm,+fullfp16,+fp-armv8,+neon", 340>; def : FMVExtension<"sve2", "FEAT_SVE2", "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370>; def : FMVExtension<"sve2-aes", "FEAT_SVE_PMULL128", "+sve2,+sve,+aes,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380>; def : FMVExtension<"sve2-bitperm", "FEAT_SVE_BITPERM", "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400>; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 32ba2866ac8180..31a720ed7b5c77 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2536,6 +2536,11 @@ unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode( case AArch64ISD::FCMLTz: // Compares return either 0 or all-ones return VTBits; + case AArch64ISD::VASHR: { + unsigned Tmp = + DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + return std::min(Tmp + Op.getConstantOperandVal(1), VTBits); + } } return 1; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 6194de2d56b630..457e918728ae27 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -244,7 +244,7 @@ def HasSVEorSME : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">, AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME), "sve or sme">; -def HasSVEorSME2p2 +def HasNonStreamingSVEorSME2p2 : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||" "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">, AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2), @@ -281,6 +281,11 @@ def HasSMEF16F16orSMEF8F16 : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">, AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16), "sme-f16f16 or sme-f8f16">; +def HasNonStreamingSVE2p2orSME2p2 + : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p2()) ||" + "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">, + AssemblerPredicateWithAll<(any_of FeatureSVE2p2, FeatureSME2p2), + "sme2p2 or sve2p2">; // A subset of NEON instructions are legal in Streaming SVE execution mode, // so don't need the additional check for 'isNeonAvailable'. diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index 8516ab2c7dd71c..4117d74d10c1e7 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -19,18 +19,24 @@ class AArch64Reg enc, string n, list subregs = [], } let Namespace = "AArch64" in { + // SubRegIndexes for GPR registers def sub_32 : SubRegIndex<32>; + def sube64 : SubRegIndex<64>; + def subo64 : SubRegIndex<64>; + def sube32 : SubRegIndex<32>; + def subo32 : SubRegIndex<32>; + // SubRegIndexes for FPR/Vector registers def bsub : SubRegIndex<8>; def hsub : SubRegIndex<16>; def ssub : SubRegIndex<32>; def dsub : SubRegIndex<64>; - def sube32 : SubRegIndex<32>; - def subo32 : SubRegIndex<32>; - def sube64 : SubRegIndex<64>; - def subo64 : SubRegIndex<64>; - // SVE - def zsub : SubRegIndex<128>; + def zsub : SubRegIndex<128>; + // Note: Code depends on these having consecutive numbers + def zsub0 : SubRegIndex<128, -1>; + def zsub1 : SubRegIndex<128, -1>; + def zsub2 : SubRegIndex<128, -1>; + def zsub3 : SubRegIndex<128, -1>; // Note: Code depends on these having consecutive numbers def dsub0 : SubRegIndex<64>; def dsub1 : SubRegIndex<64>; @@ -41,7 +47,8 @@ let Namespace = "AArch64" in { def qsub1 : SubRegIndex<128>; def qsub2 : SubRegIndex<128>; def qsub3 : SubRegIndex<128>; - // Note: Code depends on these having consecutive numbers + + // SubRegIndexes for SME Matrix tiles def zasubb : SubRegIndex<2048>; // (16 x 16)/1 bytes = 2048 bits def zasubh0 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits def zasubh1 : SubRegIndex<1024>; // (16 x 16)/2 bytes = 1024 bits @@ -52,7 +59,11 @@ let Namespace = "AArch64" in { def zasubq0 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits def zasubq1 : SubRegIndex<128>; // (16 x 16)/16 bytes = 128 bits - def psub : SubRegIndex<16>; + // SubRegIndexes for SVE Predicates + def psub : SubRegIndex<16>; + // Note: Code depends on these having consecutive numbers + def psub0 : SubRegIndex<16, -1>; + def psub1 : SubRegIndex<16, -1>; } let Namespace = "AArch64" in { @@ -1026,11 +1037,6 @@ def PNR16_p8to15 : PNRP8to15RegOp<"h", PNRAsmOp16_p8to15, 16, PNR_p8to15>; def PNR32_p8to15 : PNRP8to15RegOp<"s", PNRAsmOp32_p8to15, 32, PNR_p8to15>; def PNR64_p8to15 : PNRP8to15RegOp<"d", PNRAsmOp64_p8to15, 64, PNR_p8to15>; -let Namespace = "AArch64" in { - def psub0 : SubRegIndex<16, -1>; - def psub1 : SubRegIndex<16, -1>; -} - class PPRorPNRClass : RegisterClass< "AArch64", [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16, @@ -1123,8 +1129,7 @@ let EncoderMethod = "EncodeRegMul_MinMax<2, 0, 14>", } // end let EncoderMethod/DecoderMethod -//****************************************************************************** - +//===----------------------------------------------------------------------===// // SVE vector register classes class ZPRClass : RegisterClass<"AArch64", [nxv16i8, nxv8i16, nxv4i32, nxv2i64, @@ -1245,13 +1250,6 @@ def FPR32asZPR : FPRasZPROperand<32>; def FPR64asZPR : FPRasZPROperand<64>; def FPR128asZPR : FPRasZPROperand<128>; -let Namespace = "AArch64" in { - def zsub0 : SubRegIndex<128, -1>; - def zsub1 : SubRegIndex<128, -1>; - def zsub2 : SubRegIndex<128, -1>; - def zsub3 : SubRegIndex<128, -1>; -} - // Pairs, triples, and quads of SVE vector registers. def ZSeqPairs : RegisterTuples<[zsub0, zsub1], [(rotl ZPR, 0), (rotl ZPR, 1)]>; def ZSeqTriples : RegisterTuples<[zsub0, zsub1, zsub2], [(rotl ZPR, 0), (rotl ZPR, 1), (rotl ZPR, 2)]>; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index b7165294288946..7357aa3c1f0d55 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -1017,6 +1017,12 @@ let Predicates = [HasSME2p2] in { defm FMUL_2Z2Z : sme2_multi2_fmul_mm< "fmul">; defm FMUL_4ZZ : sme2_multi4_fmul_sm<"fmul">; defm FMUL_4Z4Z : sme2_multi4_fmul_mm< "fmul">; + + defm FMOP4A : sme2_fmop4as_fp32_non_widening<0, "fmop4a">; + defm FMOP4S : sme2_fmop4as_fp32_non_widening<1, "fmop4s">; + + defm FMOP4A : sme2_fmop4as_fp16_fp32_widening<0, "fmop4a">; + defm FMOP4S : sme2_fmop4as_fp16_fp32_widening<1, "fmop4s">; } // [HasSME2p2] let Predicates = [HasSME2p2, HasSMEB16B16] in { @@ -1047,3 +1053,13 @@ let Predicates = [HasSME2p2, HasSMEF8F32] in { defm FMOP4A : sme2_fmop4a_fp8_fp32_4way<"fmop4a">; } } + +let Predicates = [HasSME2p2, HasSMEB16B16] in { + defm BFMOP4A : sme2_bfmop4as_non_widening<0, "bfmop4a">; + defm BFMOP4S : sme2_bfmop4as_non_widening<1, "bfmop4s">; +} + +let Predicates = [HasSME2p2, HasSMEF64F64] in { + defm FMOP4A : sme2_fmop4as_fp64_non_widening<0, "fmop4a">; + defm FMOP4S : sme2_fmop4as_fp64_non_widening<1, "fmop4s">; +} diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 5c5ae898a8ac02..d6662d15617fab 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -928,9 +928,10 @@ let Predicates = [HasSVEorSME] in { defm SPLICE_ZPZ : sve_int_perm_splice<"splice", AArch64splice>; } // End HasSVEorSME -let Predicates = [HasSVE] in { - defm COMPACT_ZPZ : sve_int_perm_compact<"compact", int_aarch64_sve_compact>; -} // End HasSVE +// COMPACT - word and doubleword +let Predicates = [HasNonStreamingSVEorSME2p2] in { + defm COMPACT_ZPZ : sve_int_perm_compact_sd<"compact", int_aarch64_sve_compact>; +} let Predicates = [HasSVEorSME] in { defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>; @@ -2128,7 +2129,7 @@ let Predicates = [HasSVEorSME] in { defm CNTH_XPiI : sve_int_count<0b010, "cnth", int_aarch64_sve_cnth>; defm CNTW_XPiI : sve_int_count<0b100, "cntw", int_aarch64_sve_cntw>; defm CNTD_XPiI : sve_int_count<0b110, "cntd", int_aarch64_sve_cntd>; - defm CNTP_XPP : sve_int_pcount_pred<0b0000, "cntp", int_aarch64_sve_cntp>; + defm CNTP_XPP : sve_int_pcount_pred<0b000, "cntp", int_aarch64_sve_cntp>; def : Pat<(i64 (AArch64CttzElts nxv16i1:$Op1)), (CNTP_XPP_B (BRKB_PPzP (PTRUE_B 31), PPR:$Op1), @@ -3850,7 +3851,7 @@ let Predicates = [HasSVE2] in { let Predicates = [HasSVE2orSME] in { // SVE2 vector splice (constructive) - defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">; + defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice", AArch64splice>; } // End HasSVE2orSME let Predicates = [HasSVE2] in { @@ -4303,8 +4304,21 @@ let Predicates = [HasSVE2p2orSME2p2] in { def SXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1000, "sxtw", ZPR64>; def UXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1010, "uxtw", ZPR64>; + // SVE predicate count + defm FIRSTP_XPP : sve_int_pcount_pred_tmp<0b001, "firstp">; + defm LASTP_XPP : sve_int_pcount_pred_tmp<0b010, "lastp">; } // End HasSME2p2orSVE2p2 +//===----------------------------------------------------------------------===// +// SME2.2 or SVE2.2 instructions - Legal in streaming mode iff target has SME2p2 +//===----------------------------------------------------------------------===// +let Predicates = [HasNonStreamingSVE2p2orSME2p2] in { + // SVE2 EXPAND + defm EXPAND_ZPZ : sve2_int_perm_expand<"expand">; + // SVE COMPACT - byte and halfword + defm COMPACT_ZPZ : sve_int_perm_compact_bh<"compact">; +} + //===----------------------------------------------------------------------===// // SVE2 FP8 instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index 7fb2a961e0313d..736d57e6ae2fd9 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -86,6 +86,13 @@ static cl::alias AArch64StreamingStackHazardSize( cl::desc("alias for -aarch64-streaming-hazard-size"), cl::aliasopt(AArch64StreamingHazardSize)); +// Subreg liveness tracking is disabled by default for now until all issues +// are ironed out. This option allows the feature to be used in tests. +static cl::opt + EnableSubregLivenessTracking("aarch64-enable-subreg-liveness-tracking", + cl::init(false), cl::Hidden, + cl::desc("Enable subreg liveness tracking")); + unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const { if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0) return OverrideVectorInsertExtractBaseCost; @@ -380,6 +387,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU, ReserveXRegisterForRA.set(29); AddressCheckPSV.reset(new AddressCheckPseudoSourceValue(TM)); + + EnableSubregLiveness = EnableSubregLivenessTracking.getValue(); } const CallLowering *AArch64Subtarget::getCallLowering() const { diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 50adb7cbf69a87..f3dcce3f3994ba 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -90,6 +90,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { unsigned VScaleForTuning = 2; TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled; + bool EnableSubregLiveness; + /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; @@ -153,6 +155,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } + bool enableSubRegLiveness() const override { return EnableSubregLiveness; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp index 762a7af8c3ddb3..2ee2ee5a6fa500 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -59,12 +59,12 @@ bool AArch64InstPrinter::applyTargetSpecificCLOption(StringRef Opt) { return false; } -void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << getRegisterName(Reg); } void AArch64InstPrinter::printRegName(raw_ostream &OS, MCRegister Reg, - unsigned AltIdx) const { + unsigned AltIdx) { markup(OS, Markup::Register) << getRegisterName(Reg, AltIdx); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h index e7b62b3203681b..9cf2674ae943aa 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -29,8 +29,8 @@ class AArch64InstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &OS, MCRegister Reg) const override; - void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx) const; + void printRegName(raw_ostream &OS, MCRegister Reg) override; + void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx); // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index e7c90b0ed14e06..867901ac5d9035 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -5417,3 +5417,151 @@ multiclass sme2_fmop4a_fp8_fp32_4way { // Multiple vectors def _M2Z2Z_BtoS : sme2_fp8_fp32_quarter_tile_outer_product<1, 1, mnemonic, ZZ_b_mul_r_Lo, ZZ_b_mul_r_Hi>; } + +class sme2_bf16_fp16_quarter_tile_outer_product + : I<(outs TileOp16:$ZAda), + (ins TileOp16:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bit ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000001001; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-1} = 0b100; + let Inst{0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_bfmop4as_non_widening { + // Single vectors + def _MZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_H : sme2_bf16_fp16_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_H : sme2_bf16_fp16_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; +} + +class sme2_fp32_quarter_tile_outer_product + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000000000; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-2} = 0b00; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4as_fp32_non_widening { + // Single vectors + def _MZZ_S : sme2_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR32Mul2_Lo, ZPR32Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_S : sme2_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZPR32Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_S : sme2_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR32Mul2_Lo, ZZ_s_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_S : sme2_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_s_mul_r_Lo, ZZ_s_mul_r_Hi>; +} + +class sme2_fp64_quarter_tile_outer_product + : I<(outs TileOp64:$ZAda), + (ins TileOp64:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<3> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000000110; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3} = 0b1; + let Inst{2-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4as_fp64_non_widening { + // Single vectors + def _MZZ_D : sme2_fp64_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR64Mul2_Lo, ZPR64Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_D : sme2_fp64_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZPR64Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_D : sme2_fp64_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR64Mul2_Lo, ZZ_d_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_D : sme2_fp64_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_d_mul_r_Lo, ZZ_d_mul_r_Hi>; +} + +class sme2_fp16_fp32_quarter_tile_outer_product + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, zn_ty:$Zn, zm_ty:$Zm), + mnemonic, "\t$ZAda, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<3> Zn; + bits<3> Zm; + + let Inst{31-21} = 0b10000001001; + let Inst{20} = M; + let Inst{19-17} = Zm; + let Inst{16-10} = 0b0000000; + let Inst{9} = N; + let Inst{8-6} = Zn; + let Inst{5} = 0; + let Inst{4} = S; + let Inst{3-2} = 0b00; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmop4as_fp16_fp32_widening { + // Single vectors + def _MZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 0, S, mnemonic, ZPR16Mul2_Lo, ZPR16Mul2_Hi>; + + // Multiple and single vectors + def _M2ZZ_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<0, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZPR16Mul2_Hi>; + + // Single and multiple vectors + def _MZ2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 0, S, mnemonic, ZPR16Mul2_Lo, ZZ_h_mul_r_Hi>; + + // Multiple vectors + def _M2Z2Z_HtoS : sme2_fp16_fp32_quarter_tile_outer_product<1, 1, S, mnemonic, ZZ_h_mul_r_Lo, ZZ_h_mul_r_Hi>; +} diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 88a0983aa1480d..552d5b9b23a7e4 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1046,7 +1046,7 @@ multiclass sve_int_count_v opc, string asm, (!cast(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>; } -class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, +class sve_int_pcount_pred sz8_64, bits<3> opc, string asm, PPRRegOp pprty> : I<(outs GPR64:$Rd), (ins PPRAny:$Pg, pprty:$Pn), asm, "\t$Rd, $Pg, $Pn", @@ -1058,17 +1058,17 @@ class sve_int_pcount_pred sz8_64, bits<4> opc, string asm, let Inst{31-24} = 0b00100101; let Inst{23-22} = sz8_64; let Inst{21-19} = 0b100; - let Inst{18-16} = opc{3-1}; + let Inst{18-16} = opc{2-0}; let Inst{15-14} = 0b10; let Inst{13-10} = Pg; - let Inst{9} = opc{0}; + let Inst{9} = 0b0; let Inst{8-5} = Pn; let Inst{4-0} = Rd; let hasSideEffects = 0; } -multiclass sve_int_pcount_pred opc, string asm, +multiclass sve_int_pcount_pred opc, string asm, SDPatternOperator int_op> { def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; @@ -1081,6 +1081,12 @@ multiclass sve_int_pcount_pred opc, string asm, def : SVE_2_Op_Pat(NAME # _D)>; } +multiclass sve_int_pcount_pred_tmp opc, string asm> { + def _B : sve_int_pcount_pred<0b00, opc, asm, PPR8>; + def _H : sve_int_pcount_pred<0b01, opc, asm, PPR16>; + def _S : sve_int_pcount_pred<0b10, opc, asm, PPR32>; + def _D : sve_int_pcount_pred<0b11, opc, asm, PPR64>; +} //===----------------------------------------------------------------------===// // SVE Element Count Group //===----------------------------------------------------------------------===// @@ -7308,11 +7314,59 @@ class sve2_int_perm_splice_cons sz8_64, string asm, let hasSideEffects = 0; } -multiclass sve2_int_perm_splice_cons { +multiclass sve2_int_perm_splice_cons { def _B : sve2_int_perm_splice_cons<0b00, asm, ZPR8, ZZ_b>; def _H : sve2_int_perm_splice_cons<0b01, asm, ZPR16, ZZ_h>; def _S : sve2_int_perm_splice_cons<0b10, asm, ZPR32, ZZ_s>; def _D : sve2_int_perm_splice_cons<0b11, asm, ZPR64, ZZ_d>; + + let AddedComplexity = 2 in { + foreach VT = [nxv16i8] in + def : Pat<(VT (op nxv16i1:$pred, VT:$zn1, VT:$zn2)), + (!cast(NAME # _B) + nxv16i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>; + + foreach VT = [nxv8i16, nxv8f16, nxv8bf16] in + def : Pat<(VT (op nxv8i1:$pred, VT:$zn1, VT:$zn2)), + (!cast(NAME # _H) + nxv8i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>; + + foreach VT = [nxv4i32, nxv4f16, nxv4f32, nxv4bf16] in + def : Pat<(VT (op nxv4i1:$pred, VT:$zn1, VT:$zn2)), + (!cast(NAME # _S) + nxv4i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>; + + foreach VT = [nxv2i64, nxv2f16, nxv2f32, nxv2f64, nxv2bf16] in + def : Pat<(VT (op nxv2i1:$pred, VT:$zn1, VT:$zn2)), + (!cast(NAME # _D) + nxv2i1:$pred, (REG_SEQUENCE ZPR2, VT:$zn1, zsub0, VT:$zn2, zsub1))>; + } +} + +class sve2_int_perm_expand sz, string asm, + ZPRRegOp zprty> +: I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), + asm, "\t$Zd, $Pg, $Zn", + "", + []>, Sched<[]> { + bits<3> Pg; + bits<5> Zn; + bits<5> Zd; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz; + let Inst{21-13} = 0b110001100; + let Inst{12-10} = Pg; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; + + let hasSideEffects = 0; +} + +multiclass sve2_int_perm_expand { + def _B : sve2_int_perm_expand<0b00, asm, ZPR8>; + def _H : sve2_int_perm_expand<0b01, asm, ZPR16>; + def _S : sve2_int_perm_expand<0b10, asm, ZPR32>; + def _D : sve2_int_perm_expand<0b11, asm, ZPR64>; } class sve_int_perm_rev sz8_64, bits<2> opc, string asm, @@ -7476,7 +7530,7 @@ multiclass sve_int_perm_cpy_v { (!cast(NAME # _H) $passthru, $pg, $splat)>; } -class sve_int_perm_compact +class sve_int_perm_compact sz, string asm, ZPRRegOp zprty> : I<(outs zprty:$Zd), (ins PPR3bAny:$Pg, zprty:$Zn), asm, "\t$Zd, $Pg, $Zn", "", @@ -7484,8 +7538,8 @@ class sve_int_perm_compact bits<3> Pg; bits<5> Zd; bits<5> Zn; - let Inst{31-23} = 0b000001011; - let Inst{22} = sz; + let Inst{31-24} = 0b00000101; + let Inst{23-22} = sz; let Inst{21-13} = 0b100001100; let Inst{12-10} = Pg; let Inst{9-5} = Zn; @@ -7494,9 +7548,9 @@ class sve_int_perm_compact let hasSideEffects = 0; } -multiclass sve_int_perm_compact { - def _S : sve_int_perm_compact<0b0, asm, ZPR32>; - def _D : sve_int_perm_compact<0b1, asm, ZPR64>; +multiclass sve_int_perm_compact_sd { + def _S : sve_int_perm_compact<0b10, asm, ZPR32>; + def _D : sve_int_perm_compact<0b11, asm, ZPR64>; def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _S)>; @@ -7504,6 +7558,11 @@ multiclass sve_int_perm_compact { def : SVE_2_Op_Pat(NAME # _D)>; } +multiclass sve_int_perm_compact_bh { + def _B : sve_int_perm_compact<0b00, asm, ZPR8>; + def _H : sve_int_perm_compact<0b01, asm, ZPR16>; +} + //===----------------------------------------------------------------------===// // SVE Memory - Contiguous Load Group //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 687a7339da379d..6a69b9d2bfc716 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -358,7 +358,7 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize { const auto *CallerInfo = A.getAAFor( *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); - if (!CallerInfo) + if (!CallerInfo || !CallerInfo->isValidState()) return false; Change = Change | clampStateAndIndicateChange(this->getState(), @@ -449,7 +449,8 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { // Check for Intrinsics and propagate attributes. const AACallEdges *AAEdges = A.getAAFor( *this, this->getIRPosition(), DepClassTy::REQUIRED); - if (!AAEdges || AAEdges->hasNonAsmUnknownCallee()) + if (!AAEdges || !AAEdges->isValidState() || + AAEdges->hasNonAsmUnknownCallee()) return indicatePessimisticFixpoint(); bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv()); @@ -465,7 +466,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { if (IID == Intrinsic::not_intrinsic) { const AAAMDAttributes *AAAMD = A.getAAFor( *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); - if (!AAAMD) + if (!AAAMD || !AAAMD->isValidState()) return indicatePessimisticFixpoint(); *this &= *AAAMD; continue; @@ -660,7 +661,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes { const auto *PointerInfoAA = A.getAAFor( *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED); - if (!PointerInfoAA) + if (!PointerInfoAA || !PointerInfoAA->getState().isValidState()) return false; return PointerInfoAA->forallInterferingAccesses( @@ -717,7 +718,7 @@ struct AAAMDSizeRangeAttribute const auto *CallerInfo = A.getAAFor( *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); - if (!CallerInfo) + if (!CallerInfo || !CallerInfo->isValidState()) return false; Change |= @@ -835,7 +836,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { auto &InfoCache = static_cast(A.getInfoCache()); if (const auto *AssumedGroupSize = A.getAAFor( - *this, IRPosition::function(*F), DepClassTy::REQUIRED)) { + *this, IRPosition::function(*F), DepClassTy::REQUIRED); + AssumedGroupSize->isValidState()) { unsigned Min, Max; std::tie(Min, Max) = InfoCache.getWavesPerEU( @@ -864,7 +866,8 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute { *this, IRPosition::function(*Caller), DepClassTy::REQUIRED); const auto *AssumedGroupSize = A.getAAFor( *this, IRPosition::function(*Func), DepClassTy::REQUIRED); - if (!CallerInfo || !AssumedGroupSize) + if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() || + !AssumedGroupSize->isValidState()) return false; unsigned Min, Max; @@ -982,7 +985,8 @@ struct AAAMDGPUNoAGPR // TODO: Handle callsite attributes const auto *CalleeInfo = A.getAAFor( *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); - return CalleeInfo && CalleeInfo->getAssumed(); + return CalleeInfo && CalleeInfo->isValidState() && + CalleeInfo->getAssumed(); }; bool UsedAssumedInformation = false; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 351e9f25e29cfc..ab62e530a18d0c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -230,13 +230,6 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler { return AddrReg.getReg(0); } - void assignValueToReg(Register ValVReg, Register PhysReg, - const CCValAssign &VA) override { - MIB.addUse(PhysReg, RegState::Implicit); - Register ExtReg = extendRegisterMin32(*this, ValVReg, VA); - MIRBuilder.buildCopy(PhysReg, ExtReg); - } - void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy, const MachinePointerInfo &MPO, const CCValAssign &VA) override { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index f8744d6a483cff..7dd7388376f474 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -1159,7 +1159,6 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( if (LoadInst *LI = dyn_cast(UseInst)) { if (LI->isVolatile()) return false; - continue; } @@ -1170,12 +1169,19 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( // Reject if the stored value is not the pointer operand. if (SI->getPointerOperand() != Val) return false; - } else if (AtomicRMWInst *RMW = dyn_cast(UseInst)) { + continue; + } + + if (AtomicRMWInst *RMW = dyn_cast(UseInst)) { if (RMW->isVolatile()) return false; - } else if (AtomicCmpXchgInst *CAS = dyn_cast(UseInst)) { + continue; + } + + if (AtomicCmpXchgInst *CAS = dyn_cast(UseInst)) { if (CAS->isVolatile()) return false; + continue; } // Only promote a select if we know that the other select operand @@ -1186,6 +1192,7 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( // May need to rewrite constant operands. WorkList.push_back(ICmp); + continue; } // TODO: If we know the address is only observed through flat pointers, we @@ -1198,8 +1205,9 @@ bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes( if (isa(User) || isa(User)) return false; + // TODO: Handle vectors of pointers. if (!User->getType()->isPointerTy()) - continue; + return false; if (GetElementPtrInst *GEP = dyn_cast(UseInst)) { // Be conservative if an address could be computed outside the bounds of @@ -1504,6 +1512,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I, PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS); + assert(isa(V->getType())); + // FIXME: It doesn't really make sense to try to do this for all // instructions. V->mutateType(NewTy); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index dd8d93c3f0b72a..88caf8196b3c90 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -24,7 +24,7 @@ using namespace llvm; using namespace llvm::AMDGPU; -void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { // FIXME: The current implementation of // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this // as an integer or we provide a name which represents a physical register. diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h index a72e0fe6ea769f..4729b8a6aa6f40 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h @@ -29,7 +29,7 @@ class AMDGPUInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; static void printRegOperand(MCRegister Reg, raw_ostream &O, diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 52ca38aca5c771..bddb6e822b81b7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3855,10 +3855,14 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned ArgIdx = 0; for (auto [Reg, Val] : RegsToPass) { - if (ArgIdx++ >= NumSpecialInputs && !Val->isDivergent() && - TRI->isSGPRPhysReg(Reg)) { - // Speculatively insert a readfirstlane in case this is a uniform value in - // a VGPR. + if (ArgIdx++ >= NumSpecialInputs && + (IsChainCallConv || !Val->isDivergent()) && TRI->isSGPRPhysReg(Reg)) { + // For chain calls, the inreg arguments are required to be + // uniform. Speculatively Insert a readfirstlane in case we cannot prove + // they are uniform. + // + // For other calls, if an inreg arguments is known to be uniform, + // speculatively insert a readfirstlane in case it is in a VGPR. // // FIXME: We need to execute this in a waterfall loop if it is a divergent // value, so let that continue to produce invalid code. @@ -3893,9 +3897,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getTargetGlobalAddress(GV, DL, MVT::i64)); } else { if (IsTailCall) { - assert(!Callee->isDivergent() && - "cannot tail call a divergent call target"); - // isEligibleForTailCallOptimization considered whether the call target is // divergent, but we may still end up with a uniform value in a VGPR. // Insert a readfirstlane just in case. diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index faa0b6d6c3f506..c8a46217190a1d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3553,19 +3553,6 @@ def : AMDGPUPat < (V_BFE_U32_e64 $src, (i32 0), $width) >; -// x << (bitwidth - y) >> (bitwidth - y) -def : AMDGPUPat < - (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, i32:$width)), - (sub 32, i32:$width)), - (V_BFE_U32_e64 $src, (i32 0), $width) ->; - -def : AMDGPUPat < - (DivergentBinFrag (shl_oneuse i32:$src, (sub 32, i32:$width)), - (sub 32, i32:$width)), - (V_BFE_I32_e64 $src, (i32 0), $width) ->; - // SHA-256 Ma patterns // ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp index d76c2810c39f8c..e669b9479369d6 100644 --- a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp +++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.cpp @@ -93,7 +93,7 @@ static const char *ARCCondCodeToString(ARCCC::CondCode CC) { return BadConditionCode(CC); } -void ARCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void ARCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h index baf4a6915b7065..c4bd73448ca71b 100644 --- a/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h +++ b/llvm/lib/Target/ARC/MCTargetDesc/ARCInstPrinter.h @@ -30,7 +30,7 @@ class ARCInstPrinter : public MCInstPrinter { void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; void printCCOperand(const MCInst *MI, int OpNum, raw_ostream &O); diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp index e4a2f8c8f2ea0c..5a6895a4ab84ef 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp @@ -50,7 +50,7 @@ static unsigned translateShiftImm(unsigned imm) { } static void printRegImmShift(raw_ostream &O, ARM_AM::ShiftOpc ShOpc, - unsigned ShImm, const ARMInstPrinter &printer) { + unsigned ShImm, ARMInstPrinter &printer) { if (ShOpc == ARM_AM::no_shift || (ShOpc == ARM_AM::lsl && !ShImm)) return; O << ", "; @@ -81,7 +81,7 @@ bool ARMInstPrinter::applyTargetSpecificCLOption(StringRef Opt) { return false; } -void ARMInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void ARMInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << getRegisterName(Reg, DefaultAltIdx); } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h index 494a644cf54546..cd1dddc5f331a3 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.h @@ -27,7 +27,7 @@ class ARMInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; // Autogenerated by tblgen. std::pair getMnemonic(const MCInst *MI) override; diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td index 9809caa8bd8f65..f88daeed8d4215 100644 --- a/llvm/lib/Target/CSKY/CSKY.td +++ b/llvm/lib/Target/CSKY/CSKY.td @@ -97,28 +97,28 @@ def iHasFLOAT7E60 : Predicate<"Subtarget->hasFLOAT7E60()">, "Support CSKY float7e60 instructions">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", - "Enable divide instrutions">; + "Enable divide instructions">; def HasHWDiv : Predicate<"Subtarget->hasHardwareDivide()">, AssemblerPredicate<(all_of FeatureHWDiv), - "Enable divide instrutions">; + "Enable divide instructions">; def FeatureSTM : SubtargetFeature<"multiple_stld", "HasSTM", "true", - "Enable multiple load/store instrutions">; + "Enable multiple load/store instructions">; def HasSTM : Predicate<"Subtarget->hasSTM()">, AssemblerPredicate<(all_of FeatureSTM), - "Enable multiple load/store instrutions">; + "Enable multiple load/store instructions">; def FeaturePushPop : SubtargetFeature<"pushpop", "HasPushPop", "true", - "Enable push/pop instrutions">; + "Enable push/pop instructions">; def HasPushPop : Predicate<"Subtarget->hasPushPop()">, AssemblerPredicate<(all_of FeaturePushPop), - "Enable push/pop instrutions">; + "Enable push/pop instructions">; def FeatureDSP - : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instrutions">; + : SubtargetFeature<"edsp", "HasDSP", "true", "Enable DSP instructions">; def HasDSP : Predicate<"Subtarget->hasDSP()">, AssemblerPredicate<(all_of FeatureDSP), - "Enable DSP instrutions">; + "Enable DSP instructions">; def HasDSP1E2 : SubtargetFeature<"dsp1e2", "HasDSP1E2", "true", "Support CSKY dsp1e2 instructions">; @@ -133,16 +133,16 @@ def iHasDSPE60 : Predicate<"Subtarget->hasDSPE60()">, "Support CSKY dspe60 instructions">; def FeatureDSPV2 : SubtargetFeature<"dspv2", "HasDSPV2", "true", - "Enable DSP V2.0 instrutions">; + "Enable DSP V2.0 instructions">; def HasDSPV2 : Predicate<"Subtarget->hasDSPV2()">, AssemblerPredicate<(all_of FeatureDSPV2), - "Enable DSP V2.0 instrutions">; + "Enable DSP V2.0 instructions">; def FeatureDSP_Silan : SubtargetFeature<"dsp_silan", "HasDSP_Silan", "true", - "Enable DSP Silan instrutions">; + "Enable DSP Silan instructions">; def HasDSP_Silan : Predicate<"Subtarget->hasDSP_Silan()">, AssemblerPredicate<(all_of FeatureDSP_Silan), - "Enable DSP Silan instrutions">; + "Enable DSP Silan instructions">; // Atomic Support def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true", @@ -232,11 +232,11 @@ def FeatureSoftTP : SubtargetFeature<"soft-tp", "ReadTPHard", "false", "Disable TLS Pointer register">; def FeatureIstack : SubtargetFeature<"istack", "EnableInterruptAttribute", - "true", "Enable interrput attribute">; + "true", "Enable interrupt attribute">; def EnableInterruptAttribute : Predicate<"Subtarget->enableInterruptAttribute()">, AssemblerPredicate<(all_of FeatureIstack), - "Enable interrput attribute">; + "Enable interrupt attribute">; def FeatureConstPool : SubtargetFeature<"constpool", "DumpConstPool", "true", "Dump the constant pool by compiler">; diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp index 9af7958112fce3..a4b0d8488cf53b 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp @@ -82,7 +82,7 @@ void CSKYInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void CSKYInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void CSKYInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { if (PrintBranchImmAsAddress) O << getRegisterName(Reg, ABIRegNames ? CSKY::ABIRegAltName : CSKY::NoRegAltName); diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h index 461d7f6f12b371..16eccfdfb5ce5b 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h @@ -31,7 +31,7 @@ class CSKYInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier = nullptr); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp index de5c8b86978a82..e4e84a80b5d0bc 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp @@ -26,7 +26,7 @@ using namespace llvm; #define GET_INSTRUCTION_NAME #include "HexagonGenAsmWriter.inc" -void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void HexagonInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << getRegisterName(Reg); } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h index 38a9081c93fe79..fe37cd91dabc6a 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h @@ -30,7 +30,7 @@ class HexagonInstPrinter : public MCInstPrinter { void printInst(MCInst const *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; static char const *getRegisterName(MCRegister Reg); diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp index 0265a75fb346c9..4b5751eaedda05 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.cpp @@ -31,7 +31,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "LanaiGenAsmWriter.inc" -void LanaiInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void LanaiInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h index 55a254036fee5b..851613b27e3dd9 100644 --- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h +++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiInstPrinter.h @@ -49,7 +49,7 @@ class LanaiInstPrinter : public MCInstPrinter { unsigned OpIdx, unsigned PrintMethodIdx, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; private: bool printAlias(const MCInst *MI, raw_ostream &Ostream); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp index cb2521db5217e8..e3007cfe3d401b 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp @@ -56,7 +56,7 @@ void LoongArchInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void LoongArchInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void LoongArchInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << '$' << getRegisterName(Reg); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h index 4e6092bfcb1282..8cda3fdb4510e5 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h @@ -28,7 +28,7 @@ class LoongArchInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printAtomicMemOp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp index 84800fc762cbb8..68ac15b57508c1 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp @@ -41,7 +41,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "M68kGenAsmWriter.inc" -void M68kInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void M68kInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << "%" << getRegisterName(Reg); } diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h index 0963176304587c..d6d17ca9568e02 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h @@ -34,7 +34,7 @@ class M68kInstPrinter : public MCInstPrinter, void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp index 3726c600f4a7b8..d8a27f34c6fd13 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp @@ -26,7 +26,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "MSP430GenAsmWriter.inc" -void MSP430InstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void MSP430InstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << getRegisterName(Reg); } diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h index 40605b92bcb01a..413492b8efeeda 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h @@ -22,7 +22,7 @@ namespace llvm { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp index 1518a539782efb..2fd1b344eb687e 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.cpp @@ -72,7 +72,7 @@ const char* Mips::MipsFCCToString(Mips::CondCode CC) { llvm_unreachable("Impossible condition code!"); } -void MipsInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void MipsInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << '$' << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h index 0652b237509fe3..8e3b4614a4aade 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsInstPrinter.h @@ -84,7 +84,7 @@ class MipsInstPrinter : public MCInstPrinter { const MCSubtargetInfo &STI, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp index 9b589284463294..4211ae5a2eebcd 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp @@ -34,7 +34,7 @@ NVPTXInstPrinter::NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} -void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { // Decode the virtual register // Must be kept in sync with NVPTXAsmPrinter::encodeVirtualRegister unsigned RCId = (Reg.id() >> 28); diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h index e8a4a6dbdd5324..63207e8a975ace 100644 --- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h +++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h @@ -24,7 +24,7 @@ class NVPTXInstPrinter : public MCInstPrinter { NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index a95cba586b8fc3..01abf9591e342f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -1335,8 +1335,6 @@ NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) return TypeSplitVector; - if (Isv2x16VT(VT)) - return TypeLegal; return TargetLoweringBase::getPreferredVectorAction(VT); } diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 5f6cba397c5352..1ca3aefb0b0934 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1922,7 +1922,7 @@ def imem : Operand { let PrintMethod = "printOperand"; } -def imemAny : Operand { +def imemAny : Operand { let PrintMethod = "printOperand"; } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 1d6f39b290536a..a5a147da8da1c5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -332,7 +332,7 @@ void NVPTXPassConfig::addIRPasses() { disablePass(&PrologEpilogCodeInserterID); disablePass(&MachineLateInstrsCleanupID); disablePass(&MachineCopyPropagationID); - disablePass(&TailDuplicateID); + disablePass(&TailDuplicateLegacyID); disablePass(&StackMapLivenessID); disablePass(&PostRAMachineSinkingID); disablePass(&PostRASchedulerID); @@ -461,7 +461,7 @@ void NVPTXPassConfig::addOptimizedRegAlloc() { void NVPTXPassConfig::addMachineSSAOptimization() { // Pre-ra tail duplication. - if (addPass(&EarlyTailDuplicateID)) + if (addPass(&EarlyTailDuplicateLegacyID)) printAndVerify("After Pre-RegAlloc TailDuplicate"); // Optimize PHIs before DCE: removing dead PHI cycles may make more diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp index 9a4291c90408d6..7511e24f705c18 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp @@ -47,7 +47,7 @@ FullRegNamesWithPercent("ppc-reg-with-percent-prefix", cl::Hidden, #define PRINT_ALIAS_INSTR #include "PPCGenAsmWriter.inc" -void PPCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void PPCInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { const char *RegName = getRegisterName(Reg); OS << RegName; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h index 6ba3eb4c79dc99..1b9365fa04961c 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h @@ -31,7 +31,7 @@ class PPCInstPrinter : public MCInstPrinter { const MCRegisterInfo &MRI, Triple T) : MCInstPrinter(MAI, MII, MRI), TT(T) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index ab31898e262e7e..d8f3095ed7fb68 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -200,6 +200,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::UADDO, isPPC64 ? MVT::i64 : MVT::i32, Custom); + // On P10, the default lowering generates better code using the + // setbc instruction. + if (!Subtarget.hasP10Vector() && isPPC64) + setOperationAction(ISD::SSUBO, MVT::i32, Custom); + // Match BITREVERSE to customized fast code sequence in the td file. setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); @@ -12016,6 +12021,36 @@ SDValue PPCTargetLowering::LowerUaddo(SDValue Op, SelectionDAG &DAG) const { return Res; } +SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const { + + SDLoc dl(Op); + + SDValue LHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(0)); + SDValue RHS64 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Op.getOperand(1)); + + SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i64, LHS64, RHS64); + + SDValue Extsw = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, Sub, + DAG.getValueType(MVT::i32)); + + SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i64, Extsw, Sub); + + SDValue Addic = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(MVT::i64, MVT::Glue), + Xor, DAG.getConstant(-1, dl, MVT::i64)); + + SDValue Overflow = + DAG.getNode(ISD::SUBE, dl, DAG.getVTList(MVT::i64, MVT::Glue), Xor, Addic, + Addic.getValue(1)); + + SDValue OverflowTrunc = + DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow); + SDValue SubTrunc = + (Sub->getValueType(0) != Op.getNode()->getValueType(0)) + ? DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(0), Sub) + : Sub; + return DAG.getMergeValues({SubTrunc, OverflowTrunc}, dl); +} + /// LowerOperation - Provide custom lowering hooks for some operations. /// SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { @@ -12038,6 +12073,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::SSUBO: + return LowerSSUBO(Op, DAG); case ISD::INLINEASM: case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 0adbad86845973..dde45e4cf6f4ae 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1279,6 +1279,7 @@ namespace llvm { SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUaddo(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index d82f78498418da..19103e219cb800 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -330,7 +330,20 @@ enum OperandType : unsigned { OPERAND_RVKRNUM_1_10, OPERAND_RVKRNUM_2_14, OPERAND_SPIMM, - OPERAND_LAST_RISCV_IMM = OPERAND_SPIMM, + // Operand is a 3-bit rounding mode, '111' indicates FRM register. + // Represents 'frm' argument passing to floating-point operations. + OPERAND_FRMARG, + // Operand is a 3-bit rounding mode where only RTZ is valid. + OPERAND_RTZARG, + // Condition code used by select and short forward branch pseudos. + OPERAND_COND_CODE, + // Vector policy operand. + OPERAND_VEC_POLICY, + // Vector SEW operand. + OPERAND_SEW, + // Vector rounding mode for VXRM or FRM. + OPERAND_VEC_RM, + OPERAND_LAST_RISCV_IMM = OPERAND_VEC_RM, // Operand is either a register or uimm5, this is used by V extension pseudo // instructions to represent a value that be passed as AVL to either vsetvli // or vsetivli. diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index 1f27c934baf0dc..1445e9da4a622d 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -75,7 +75,7 @@ void RISCVInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void RISCVInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { markup(O, Markup::Register) << getRegisterName(Reg); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h index 77cc7a67e88920..c15fd591b9e956 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h @@ -28,7 +28,7 @@ class RISCVInstPrinter : public MCInstPrinter { void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O, const char *Modifier = nullptr); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index d70903519ecb05..f5851f37154519 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -42,10 +42,19 @@ RISCVFrameLowering::RISCVFrameLowering(const RISCVSubtarget &STI) /*TransientStackAlignment=*/getABIStackAlignment(STI.getTargetABI())), STI(STI) {} +// The register used to hold the frame pointer. +static constexpr Register FPReg = RISCV::X8; + +// The register used to hold the stack pointer. +static constexpr Register SPReg = RISCV::X2; + +// The register used to hold the return address. +static constexpr Register RAReg = RISCV::X1; + // Offsets which need to be scale by XLen representing locations of CSRs which // are given a fixed location by save/restore libcalls or Zcmp Push/Pop. static const std::pair FixedCSRFIMap[] = { - {/*ra*/ RISCV::X1, -1}, {/*s0*/ RISCV::X8, -2}, + {/*ra*/ RAReg, -1}, {/*s0*/ FPReg, -2}, {/*s1*/ RISCV::X9, -3}, {/*s2*/ RISCV::X18, -4}, {/*s3*/ RISCV::X19, -5}, {/*s4*/ RISCV::X20, -6}, {/*s5*/ RISCV::X21, -7}, {/*s6*/ RISCV::X22, -8}, @@ -187,6 +196,7 @@ static int getLibCallID(const MachineFunction &MF, switch (MaxReg) { default: llvm_unreachable("Something has gone wrong!"); + // clang-format off case /*s11*/ RISCV::X27: return 12; case /*s10*/ RISCV::X26: return 11; case /*s9*/ RISCV::X25: return 10; @@ -198,8 +208,9 @@ static int getLibCallID(const MachineFunction &MF, case /*s3*/ RISCV::X19: return 4; case /*s2*/ RISCV::X18: return 3; case /*s1*/ RISCV::X9: return 2; - case /*s0*/ RISCV::X8: return 1; - case /*ra*/ RISCV::X1: return 0; + case /*s0*/ FPReg: return 1; + case /*ra*/ RAReg: return 0; + // clang-format on } } @@ -284,9 +295,9 @@ getPushPopEncodingAndNum(const Register MaxReg) { return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S2, 4); case RISCV::X9: /*s1*/ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0_S1, 3); - case RISCV::X8: /*s0*/ + case FPReg: /*s0*/ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA_S0, 2); - case RISCV::X1: /*ra*/ + case RAReg: /*ra*/ return std::make_pair(llvm::RISCVZC::RLISTENCODE::RA, 1); } } @@ -372,12 +383,6 @@ uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding( return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign()); } -// Returns the register used to hold the frame pointer. -static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; } - -// Returns the register used to hold the stack pointer. -static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; } - static SmallVector getUnmanagedCSI(const MachineFunction &MF, const std::vector &CSI) { @@ -415,8 +420,6 @@ void RISCVFrameLowering::adjustStackForRVV(MachineFunction &MF, MachineInstr::MIFlag Flag) const { assert(Amount != 0 && "Did not need to adjust stack pointer for RVV."); - const Register SPReg = getSPReg(STI); - // Optimize compile time offset case StackOffset Offset = StackOffset::getScalable(Amount); if (auto VLEN = STI.getRealVLen()) { @@ -479,7 +482,7 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true); Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); Expr.push_back(0); - if (Reg == RISCV::X2) + if (Reg == SPReg) Comment << "sp"; else Comment << printReg(Reg, &TRI); @@ -530,8 +533,6 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); - Register FPReg = getFPReg(STI); - Register SPReg = getSPReg(STI); Register BPReg = RISCVABI::getBPReg(); // Debug location must be unknown since the first debug location is used @@ -762,8 +763,6 @@ void RISCVFrameLowering::deallocateStack(MachineFunction &MF, int64_t CFAOffset) const { const RISCVRegisterInfo *RI = STI.getRegisterInfo(); - Register SPReg = getSPReg(STI); - RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize), MachineInstr::FrameDestroy, getStackAlign()); } @@ -773,8 +772,6 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, const RISCVRegisterInfo *RI = STI.getRegisterInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); auto *RVFI = MF.getInfo(); - Register FPReg = getFPReg(STI); - Register SPReg = getSPReg(STI); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -922,7 +919,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } if (FI >= MinCSFI && FI <= MaxCSFI) { - FrameReg = RISCV::X2; + FrameReg = SPReg; if (FirstSPAdjustAmount) Offset += StackOffset::getFixed(FirstSPAdjustAmount); @@ -969,13 +966,13 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } else { // VarSize objects must be empty in this case! assert(!MFI.hasVarSizedObjects()); - FrameReg = RISCV::X2; + FrameReg = SPReg; } } else { FrameReg = RI->getFrameRegister(MF); } - if (FrameReg == getFPReg(STI)) { + if (FrameReg == FPReg) { Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize()); // When using FP to access scalable vector objects, we need to minus // the frame size. @@ -1067,8 +1064,8 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // Unconditionally spill RA and FP only if the function uses a frame // pointer. if (hasFP(MF)) { - SavedRegs.set(RISCV::X1); - SavedRegs.set(RISCV::X8); + SavedRegs.set(RAReg); + SavedRegs.set(FPReg); } // Mark BP as used if function has dedicated base pointer. if (hasBP(MF)) @@ -1328,7 +1325,6 @@ bool RISCVFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { MachineBasicBlock::iterator RISCVFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { - Register SPReg = RISCV::X2; DebugLoc DL = MI->getDebugLoc(); if (!hasReservedCallFrame(MF)) { diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index dc3f8254cb4e00..6291842e071a3e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -693,7 +693,7 @@ bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) { // The constants that can be encoded in the THeadMemIdx instructions // are of the form (sign_extend(imm5) << imm2). - int64_t Shift; + unsigned Shift; for (Shift = 0; Shift < 4; Shift++) if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) break; @@ -3366,7 +3366,7 @@ bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2) { if (auto *C = dyn_cast(N)) { int64_t Offset = C->getSExtValue(); - int64_t Shift; + unsigned Shift; for (Shift = 0; Shift < 4; Shift++) if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0)) break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index a3963fadf3e417..04bb964bfc48cf 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2536,6 +2536,28 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_SPIMM: Ok = (Imm & 0xf) == 0; break; + case RISCVOp::OPERAND_FRMARG: + Ok = RISCVFPRndMode::isValidRoundingMode(Imm); + break; + case RISCVOp::OPERAND_RTZARG: + Ok = Imm == RISCVFPRndMode::RTZ; + break; + case RISCVOp::OPERAND_COND_CODE: + Ok = Imm >= 0 && Imm < RISCVCC::COND_INVALID; + break; + case RISCVOp::OPERAND_VEC_POLICY: + Ok = (Imm & (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) == Imm; + break; + case RISCVOp::OPERAND_SEW: + Ok = Imm == 0 || (Imm >= 3 && Imm <= 6); + break; + case RISCVOp::OPERAND_VEC_RM: + assert(RISCVII::hasRoundModeOp(Desc.TSFlags)); + if (RISCVII::usesVXRM(Desc.TSFlags)) + Ok = isUInt<2>(Imm); + else + Ok = RISCVFPRndMode::isValidRoundingMode(Imm); + break; } if (!Ok) { ErrInfo = "Invalid immediate"; @@ -2608,6 +2630,13 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, } } + if (int Idx = RISCVII::getFRMOpNum(Desc); + Idx >= 0 && MI.getOperand(Idx).getImm() == RISCVFPRndMode::DYN && + !MI.readsRegister(RISCV::FRM, /*TRI=*/nullptr)) { + ErrInfo = "dynamic rounding mode should read FRM"; + return false; + } + return true; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 86cc638fd04ac2..a867368235584c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -387,6 +387,11 @@ def csr_sysreg : RISCVOp, TImmLeaf(Imm);"> { // A parameterized register class alternative to i32imm/i64imm from Target.td. def ixlenimm : Operand; +// Condition code used by select and short forward branch pseudos. +def cond_code : RISCVOp { + let OperandType = "OPERAND_COND_CODE"; +} + def ixlenimm_li : Operand { let ParserMatchClass = ImmXLenAsmOperand<"", "LI">; } @@ -1450,7 +1455,7 @@ def riscv_selectcc_frag : PatFrag<(ops node:$lhs, node:$rhs, node:$cc, multiclass SelectCC_GPR_rrirr { let usesCustomInserter = 1 in def _Using_CC_GPR : Pseudo<(outs valty:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, valty:$truev, valty:$falsev), [(set valty:$dst, (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index a134f37c774954..da3f207a2faf72 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -134,6 +134,8 @@ def frmarg : Operand { let ParserMatchClass = FRMArg; let PrintMethod = "printFRMArg"; let DecoderMethod = "decodeFRMArg"; + let OperandType = "OPERAND_FRMARG"; + let OperandNamespace = "RISCVOp"; } // Variants of the rounding mode operand that default to 'rne'. This is used @@ -154,6 +156,8 @@ def frmarglegacy : Operand { let ParserMatchClass = FRMArgLegacy; let PrintMethod = "printFRMArgLegacy"; let DecoderMethod = "decodeFRMArg"; + let OperandType = "OPERAND_FRMARG"; + let OperandNamespace = "RISCVOp"; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index f25dc7302608ba..16cc0e5a61f0bc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -15,7 +15,7 @@ let Predicates = [HasShortForwardBranchOpt], isSelect = 1, // This instruction moves $truev to $dst when the condition is true. It will // be expanded to control flow in RISCVExpandPseudoInsts. def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$truev), [(set GPR:$dst, (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), @@ -34,7 +34,7 @@ let Predicates = [HasConditionalMoveFusion, NoShortForwardBranchOpt], // be expanded to control flow in RISCVExpandPseudoInsts. // We use GPRNoX0 because c.mv cannot encode X0. def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPRNoX0:$falsev, GPRNoX0:$truev), [(set GPRNoX0:$dst, (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), @@ -51,143 +51,143 @@ def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst), let Predicates = [HasShortForwardBranchOpt], hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, Constraints = "$dst = $falsev" in { def PseudoCCADD : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSUB : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLL : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRL : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRA : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCAND : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCXOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCADDI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLLI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCANDI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCORI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCXORI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; // RV64I instructions def PseudoCCADDW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSUBW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLLW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCADDIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSLLIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRLIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; def PseudoCCSRAIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]>; // Zbb/Zbkb instructions def PseudoCCANDN : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCORN : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; def PseudoCCXNOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc, + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, ReadSFBALU]>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index af4f653f57afd5..19557d424d1be9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -84,6 +84,18 @@ def AVL : RegisterOperand { let OperandType = "OPERAND_AVL"; } +def vec_policy : RISCVOp { + let OperandType = "OPERAND_VEC_POLICY"; +} + +def sew : RISCVOp { + let OperandType = "OPERAND_SEW"; +} + +def vec_rm : RISCVOp { + let OperandType = "OPERAND_VEC_RM"; +} + // X0 has special meaning for vsetvl/vsetvli. // rd | rs1 | AVL value | Effect on vl //-------------------------------------------------------------- @@ -764,8 +776,8 @@ class GetVTypePredicates { class VPseudoUSLoadNoMask : Pseudo<(outs RetClass:$rd), - (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -782,7 +794,7 @@ class VPseudoUSLoadMask.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -799,7 +811,7 @@ class VPseudoUSLoadFFNoMask : Pseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -816,7 +828,7 @@ class VPseudoUSLoadFFMask.R:$rd, GPR:$vl), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$avl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -833,7 +845,7 @@ class VPseudoSLoadNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -850,7 +862,7 @@ class VPseudoSLoadMask.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, GPR:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLE { let mayLoad = 1; @@ -869,10 +881,10 @@ class VPseudoILoadNoMask LMUL, bit Ordered, bit EarlyClobber, - int TargetConstraintType = 1> : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLX { let mayLoad = 1; @@ -891,11 +903,11 @@ class VPseudoILoadMask LMUL, bit Ordered, bit EarlyClobber, - int TargetConstraintType = 1> : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, IdxClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLX { let mayLoad = 1; @@ -912,7 +924,7 @@ class VPseudoILoadMask : Pseudo<(outs), - (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>, + (ins StClass:$rd, GPRMem:$rs1, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -926,7 +938,7 @@ class VPseudoUSStoreMask : Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -940,7 +952,7 @@ class VPseudoSStoreNoMask : Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -954,7 +966,7 @@ class VPseudoSStoreMask : Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSE { let mayLoad = 0; @@ -967,7 +979,7 @@ class VPseudoSStoreMask : Pseudo<(outs RegClass:$rd), (ins RegClass:$passthru, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -981,7 +993,7 @@ class VPseudoNullaryNoMask : class VPseudoNullaryMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -996,7 +1008,7 @@ class VPseudoNullaryMask : // Nullary for pseudo instructions. They are expanded in // RISCVExpandPseudoInsts pass. class VPseudoNullaryPseudoM : - Pseudo<(outs VR:$rd), (ins AVL:$vl, ixlenimm:$sew), []>, + Pseudo<(outs VR:$rd), (ins AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1013,10 +1025,10 @@ class VPseudoNullaryPseudoM : class VPseudoUnaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, OpClass:$rs2, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1031,9 +1043,9 @@ class VPseudoUnaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>, + (ins OpClass:$rs2, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1047,10 +1059,10 @@ class VPseudoUnaryNoMaskNoPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + (ins RetClass:$passthru, OpClass:$rs2, vec_rm:$rm, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1067,10 +1079,10 @@ class VPseudoUnaryNoMaskRoundingMode : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1086,11 +1098,11 @@ class VPseudoUnaryMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, vec_rm:$rm, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1110,7 +1122,7 @@ class VPseudoUnaryMask_NoExcept : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> { + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; @@ -1122,49 +1134,9 @@ class VPseudoUnaryMask_NoExcept : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$passthru, OpClass:$rs2, ixlenimm:$frm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let Constraints = !interleave([Constraint, "$rd = $passthru"], ","); - let TargetOverlapConstraintType = TargetConstraintType; - let HasVLOp = 1; - let HasSEWOp = 1; - let HasVecPolicyOp = 1; - let HasRoundModeOp = 1; -} - -class VPseudoUnaryMask_FRM : - Pseudo<(outs GetVRegNoV0.R:$rd), - (ins GetVRegNoV0.R:$passthru, OpClass:$rs2, - VMaskOp:$vm, ixlenimm:$frm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, - RISCVVPseudo { - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let Constraints = !interleave([Constraint, "$rd = $passthru"], ","); - let TargetOverlapConstraintType = TargetConstraintType; - let HasVLOp = 1; - let HasSEWOp = 1; - let HasVecPolicyOp = 1; - let UsesMaskPolicy = 1; - let HasRoundModeOp = 1; -} - class VPseudoUnaryNoMaskGPROut : Pseudo<(outs GPR:$rd), - (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>, + (ins VR:$rs2, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1175,7 +1147,7 @@ class VPseudoUnaryNoMaskGPROut : class VPseudoUnaryMaskGPROut : Pseudo<(outs GPR:$rd), - (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + (ins VR:$rs1, VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1189,7 +1161,7 @@ class VPseudoUnaryAnyMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, - VR:$vm, AVL:$vl, ixlenimm:$sew), []>, + VR:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1203,9 +1175,9 @@ class VPseudoBinaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1220,10 +1192,10 @@ class VPseudoBinaryNoMaskPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1239,11 +1211,11 @@ class VPseudoBinaryNoMaskRoundingMode : + bit UsesVXRM_ = 1, + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, vec_rm:$rm, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1260,13 +1232,13 @@ class VPseudoBinaryMaskPolicyRoundingMode : + bit UsesVXRM_, + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, ixlenimm:$rm, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, vec_rm:$rm, AVL:$vl, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1286,10 +1258,10 @@ class VPseudoBinaryMaskPolicyRoundingMode : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), - (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1306,12 +1278,12 @@ class VPseudoTiedBinaryNoMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs2, Op2Class:$rs1, - ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + vec_rm:$rm, + AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1331,7 +1303,7 @@ class VPseudoIStoreNoMask LMUL, bit Ordered>: Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, - ixlenimm:$sew),[]>, + sew:$sew),[]>, RISCVVPseudo, RISCVVSX { let mayLoad = 0; @@ -1345,7 +1317,7 @@ class VPseudoIStoreMask LMUL, bit Ordered>: Pseudo<(outs), (ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>, + VMaskOp:$vm, AVL:$vl, sew:$sew),[]>, RISCVVPseudo, RISCVVSX { let mayLoad = 0; @@ -1359,11 +1331,11 @@ class VPseudoBinaryMaskPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1382,7 +1354,7 @@ class VPseudoTernaryMaskPolicy.R:$rd), (ins GetVRegNoV0.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1400,8 +1372,8 @@ class VPseudoTernaryMaskPolicyRoundingMode.R:$passthru, Op1Class:$rs2, Op2Class:$rs1, VMaskOp:$vm, - ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + vec_rm:$rm, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1419,11 +1391,11 @@ class VPseudoBinaryMOutMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1441,11 +1413,11 @@ class VPseudoBinaryMOutMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op2Class:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1462,13 +1434,13 @@ class VPseudoTiedBinaryMask : + bits<2> TargetConstraintType = 1> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, Op2Class:$rs1, VMaskOp:$vm, - ixlenimm:$rm, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + vec_rm:$rm, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1490,13 +1462,13 @@ class VPseudoBinaryCarry : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), !if(CarryIn, (ins Op1Class:$rs2, Op2Class:$rs1, - VMV0:$carry, AVL:$vl, ixlenimm:$sew), + VMV0:$carry, AVL:$vl, sew:$sew), (ins Op1Class:$rs2, Op2Class:$rs1, - AVL:$vl, ixlenimm:$sew)), []>, + AVL:$vl, sew:$sew)), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1512,10 +1484,10 @@ class VPseudoTiedBinaryCarryIn : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, Op1Class:$rs2, Op2Class:$rs1, - VMV0:$carry, AVL:$vl, ixlenimm:$sew), []>, + VMV0:$carry, AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1534,7 +1506,7 @@ class VPseudoTernaryNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1548,10 +1520,10 @@ class VPseudoTernaryNoMaskWithPolicy : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1567,10 +1539,10 @@ class VPseudoTernaryNoMaskWithPolicyRoundingMode : + bits<2> TargetConstraintType = 1> : Pseudo<(outs RetClass:$rd), (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2, - ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + vec_rm:$rm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -1589,7 +1561,7 @@ class VPseudoUSSegLoadNoMask NF> : Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1606,7 +1578,7 @@ class VPseudoUSSegLoadMask NF> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1624,7 +1596,7 @@ class VPseudoUSSegLoadFFNoMask NF> : Pseudo<(outs RetClass:$rd, GPR:$vl), (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1641,7 +1613,7 @@ class VPseudoUSSegLoadFFMask NF> : Pseudo<(outs GetVRegNoV0.R:$rd, GPR:$vl), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy), []>, + VMaskOp:$vm, AVL:$avl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1659,7 +1631,7 @@ class VPseudoSSegLoadNoMask NF> : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, GPRMem:$rs1, GPR:$offset, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1676,8 +1648,8 @@ class VPseudoSSegLoadMask NF> : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + GPR:$offset, VMaskOp:$vm, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo, RISCVVLSEG { let mayLoad = 1; @@ -1698,7 +1670,7 @@ class VPseudoISegLoadNoMask : Pseudo<(outs RetClass:$rd), (ins RetClass:$passthru, GPRMem:$rs1, IdxClass:$offset, AVL:$vl, - ixlenimm:$sew, ixlenimm:$policy), []>, + sew:$sew, vec_policy:$policy), []>, RISCVVPseudo, RISCVVLXSEG { let mayLoad = 1; @@ -1720,8 +1692,8 @@ class VPseudoISegLoadMask : Pseudo<(outs GetVRegNoV0.R:$rd), (ins GetVRegNoV0.R:$passthru, GPRMem:$rs1, - IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, - ixlenimm:$policy), []>, + IdxClass:$offset, VMaskOp:$vm, AVL:$vl, sew:$sew, + vec_policy:$policy), []>, RISCVVPseudo, RISCVVLXSEG { let mayLoad = 1; @@ -1740,7 +1712,7 @@ class VPseudoUSSegStoreNoMask NF> : Pseudo<(outs), - (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew), []>, + (ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1755,7 +1727,7 @@ class VPseudoUSSegStoreMask NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1770,7 +1742,7 @@ class VPseudoSSegStoreNoMask NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, GPR:$offset, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1785,7 +1757,7 @@ class VPseudoSSegStoreMask NF> : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, GPR: $offset, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSSEG { let mayLoad = 0; @@ -1803,7 +1775,7 @@ class VPseudoISegStoreNoMask : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSXSEG { let mayLoad = 0; @@ -1821,7 +1793,7 @@ class VPseudoISegStoreMask : Pseudo<(outs), (ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, + VMaskOp:$vm, AVL:$vl, sew:$sew), []>, RISCVVPseudo, RISCVVSXSEG { let mayLoad = 0; @@ -2097,7 +2069,7 @@ multiclass VPseudoBinary TargetConstraintType = 1, bit Commutable = 0> { let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); @@ -2115,8 +2087,8 @@ multiclass VPseudoBinaryRoundingMode TargetConstraintType = 1, bit Commutable = 0> { let VLMul = MInfo.value, SEW=sew, isCommutable = Commutable in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); @@ -2139,7 +2111,7 @@ multiclass VPseudoBinaryM TargetConstraintType = 1, bit Commutable = 0> { let VLMul = MInfo.value, isCommutable = Commutable in { def "_" # MInfo.MX : VPseudoBinaryNoMask { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value in { def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask; @@ -2187,7 +2159,7 @@ multiclass VPseudoTiedBinaryRoundingMode { + bits<2> TargetConstraintType = 1> { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); let VLMul = MInfo.value in { def suffix # "_TIED": @@ -2409,7 +2381,7 @@ multiclass VPseudoBinaryV_WI_RM { multiclass VPseudoBinaryV_VM { + bits<2> TargetConstraintType = 1> { let isCommutable = Commutable in def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarry { } multiclass VPseudoBinaryV_XM { + string Constraint = "", bits<2> TargetConstraintType = 1> { def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarry { + string Constraint = "", bits<2> TargetConstraintType = 1> { def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX : VPseudoBinaryCarry TargetConstraintType = 1, bit Commutable = 0> { defm _VV : VPseudoBinaryM; } -multiclass VPseudoBinaryM_VX { +multiclass VPseudoBinaryM_VX TargetConstraintType = 1> { defm "_VX" : VPseudoBinaryM; } -multiclass VPseudoBinaryM_VF { +multiclass VPseudoBinaryM_VF TargetConstraintType = 1> { defm "_V" # f.FX : VPseudoBinaryM; } -multiclass VPseudoBinaryM_VI { +multiclass VPseudoBinaryM_VI TargetConstraintType = 1> { defm _VI : VPseudoBinaryM; } @@ -3194,7 +3166,7 @@ multiclass VPseudoTernaryWithPolicy { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value in { let isCommutable = Commutable in def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy; @@ -3210,7 +3182,7 @@ multiclass VPseudoTernaryWithPolicyRoundingMode { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); let isCommutable = Commutable in @@ -3540,7 +3512,7 @@ multiclass VPseudoConversion { + bits<2> TargetConstraintType = 1> { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); let VLMul = MInfo.value, SEW=sew in { def suffix : VPseudoUnaryNoMask; @@ -3555,7 +3527,7 @@ multiclass VPseudoConversionRoundingMode { + bits<2> TargetConstraintType = 1> { let VLMul = MInfo.value, SEW=sew in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoUnaryNoMaskRoundingMode; @@ -3566,23 +3538,6 @@ multiclass VPseudoConversionRoundingMode { - let VLMul = MInfo.value, SEW=sew in { - defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); - def suffix : VPseudoUnaryNoMask_FRM; - def suffix # "_MASK" : VPseudoUnaryMask_FRM, - RISCVMaskedPseudo; - } -} - multiclass VPseudoConversionNoExcept, - SchedUnary<"WriteVFCvtFToIV", "ReadVFCvtFToIV", m.MX, - forcePassthruRead=true>; - } -} - multiclass VPseudoVFROUND_NOEXCEPT_V { foreach m = MxListF in { defm _V : VPseudoConversionNoExcept, @@ -3633,15 +3580,6 @@ multiclass VPseudoVCVTF_V_RM { } } -multiclass VPseudoVCVTF_RM_V { - foreach m = MxListF in { - foreach e = SchedSEWSet.val in - defm _V : VPseudoConversionRM, - SchedUnary<"WriteVFCvtIToFV", "ReadVFCvtIToFV", m.MX, e, - forcePassthruRead=true>; - } -} - multiclass VPseudoVWCVTI_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -3660,15 +3598,6 @@ multiclass VPseudoVWCVTI_V_RM { } } -multiclass VPseudoVWCVTI_RM_V { - defvar constraint = "@earlyclobber $rd"; - foreach m = MxListFW in { - defm _V : VPseudoConversionRM, - SchedUnary<"WriteVFWCvtFToIV", "ReadVFWCvtFToIV", m.MX, - forcePassthruRead=true>; - } -} - multiclass VPseudoVWCVTF_V { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW in { @@ -3709,15 +3638,6 @@ multiclass VPseudoVNCVTI_W_RM { } } -multiclass VPseudoVNCVTI_RM_W { - defvar constraint = "@earlyclobber $rd"; - foreach m = MxListW in { - defm _W : VPseudoConversionRM, - SchedUnary<"WriteVFNCvtFToIV", "ReadVFNCvtFToIV", m.MX, - forcePassthruRead=true>; - } -} - multiclass VPseudoVNCVTF_W_RM { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -3730,17 +3650,6 @@ multiclass VPseudoVNCVTF_W_RM { } } -multiclass VPseudoVNCVTF_RM_W { - defvar constraint = "@earlyclobber $rd"; - foreach m = MxListFW in { - foreach e = SchedSEWSet.val in - defm _W : VPseudoConversionRM, - SchedUnary<"WriteVFNCvtIToFV", "ReadVFNCvtIToFV", m.MX, e, - forcePassthruRead=true>; - } -} - multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListFW in { @@ -6471,7 +6380,7 @@ defm PseudoVFRDIV : VPseudoVFRDIV_VF_RM; //===----------------------------------------------------------------------===// // 13.5. Vector Widening Floating-Point Multiply //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, hasSideEffects = 0 in { +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFWMUL : VPseudoVWMUL_VV_VF_RM; } @@ -6504,7 +6413,7 @@ defm PseudoVFWMACCBF16 : VPseudoVWMAC_VV_VF_BF_RM; //===----------------------------------------------------------------------===// // 13.8. Vector Floating-Point Square-Root Instruction //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, hasSideEffects = 0 in +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in defm PseudoVFSQRT : VPseudoVSQR_V_RM; //===----------------------------------------------------------------------===// @@ -6516,7 +6425,7 @@ defm PseudoVFRSQRT7 : VPseudoVRCP_V; //===----------------------------------------------------------------------===// // 13.10. Vector Floating-Point Reciprocal Estimate Instruction //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, hasSideEffects = 0 in +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in defm PseudoVFREC7 : VPseudoVRCP_V_RM; //===----------------------------------------------------------------------===// @@ -6571,9 +6480,6 @@ defm PseudoVFCVT_XU_F : VPseudoVCVTI_V_RM; defm PseudoVFCVT_X_F : VPseudoVCVTI_V_RM; } -defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V; -defm PseudoVFCVT_RM_X_F : VPseudoVCVTI_RM_V; - defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; @@ -6582,8 +6488,6 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFCVT_F_XU : VPseudoVCVTF_V_RM; defm PseudoVFCVT_F_X : VPseudoVCVTF_V_RM; } -defm PseudoVFCVT_RM_F_XU : VPseudoVCVTF_RM_V; -defm PseudoVFCVT_RM_F_X : VPseudoVCVTF_RM_V; } // mayRaiseFPException = true //===----------------------------------------------------------------------===// @@ -6594,8 +6498,6 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V_RM; defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V_RM; } -defm PseudoVFWCVT_RM_XU_F : VPseudoVWCVTI_RM_V; -defm PseudoVFWCVT_RM_X_F : VPseudoVWCVTI_RM_V; defm PseudoVFWCVT_RTZ_XU_F : VPseudoVWCVTI_V; defm PseudoVFWCVT_RTZ_X_F : VPseudoVWCVTI_V; @@ -6615,8 +6517,6 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W_RM; defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W_RM; } -defm PseudoVFNCVT_RM_XU_F : VPseudoVNCVTI_RM_W; -defm PseudoVFNCVT_RM_X_F : VPseudoVNCVTI_RM_W; defm PseudoVFNCVT_RTZ_XU_F : VPseudoVNCVTI_W; defm PseudoVFNCVT_RTZ_X_F : VPseudoVNCVTI_W; @@ -6625,12 +6525,11 @@ let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W_RM; defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W_RM; } -defm PseudoVFNCVT_RM_F_XU : VPseudoVNCVTF_RM_W; -defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W; -let hasSideEffects = 0, hasPostISelHook = 1 in +let hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM; defm PseudoVFNCVTBF16_F_F : VPseudoVNCVTD_W_RM; +} defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // mayRaiseFPException = true @@ -6666,8 +6565,7 @@ let Predicates = [HasVInstructionsAnyF] in { //===----------------------------------------------------------------------===// // 14.3. Vector Single-Width Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// -let mayRaiseFPException = true, - hasSideEffects = 0 in { +let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in { defm PseudoVFREDOSUM : VPseudoVFREDO_VS_RM; defm PseudoVFREDUSUM : VPseudoVFRED_VS_RM; } @@ -6679,9 +6577,8 @@ defm PseudoVFREDMAX : VPseudoVFREDMINMAX_VS; //===----------------------------------------------------------------------===// // 14.4. Vector Widening Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// -let IsRVVWideningReduction = 1, - hasSideEffects = 0, - mayRaiseFPException = true in { +let IsRVVWideningReduction = 1, hasSideEffects = 0, mayRaiseFPException = true, + hasPostISelHook = 1 in { defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM; defm PseudoVFWREDOSUM : VPseudoVFWREDO_VS_RM; } @@ -6762,13 +6659,13 @@ let Predicates = [HasVInstructions] in { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let HasSEWOp = 1, BaseInstr = VMV_X_S in def PseudoVMV_X_S: - Pseudo<(outs GPR:$rd), (ins VR:$rs2, ixlenimm:$sew), []>, + Pseudo<(outs GPR:$rd), (ins VR:$rs2, sew:$sew), []>, Sched<[WriteVMovXS, ReadVMovXS]>, RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, isReMaterializable = 1, Constraints = "$rd = $rs1" in def PseudoVMV_S_X: Pseudo<(outs VR:$rd), - (ins VR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew), + (ins VR:$rs1, GPR:$rs2, AVL:$vl, sew:$sew), []>, Sched<[WriteVMovSX, ReadVMovSX_V, ReadVMovSX_X]>, RISCVVPseudo; @@ -6785,14 +6682,14 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in def "PseudoVFMV_" # f.FX # "_S" : Pseudo<(outs f.fprclass:$rd), - (ins VR:$rs2, ixlenimm:$sew), []>, + (ins VR:$rs2, sew:$sew), []>, Sched<[WriteVMovFS, ReadVMovFS]>, RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, isReMaterializable = 1, Constraints = "$rd = $rs1" in def "PseudoVFMV_S_" # f.FX : Pseudo<(outs VR:$rd), - (ins VR:$rs1, f.fprclass:$rs2, AVL:$vl, ixlenimm:$sew), + (ins VR:$rs1, f.fprclass:$rs2, AVL:$vl, sew:$sew), []>, Sched<[WriteVMovSF, ReadVMovSF_V, ReadVMovSF_F]>, RISCVVPseudo; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 18749f00a10a52..33e1ed120cd086 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2639,8 +2639,8 @@ foreach fvti = AllFloatVectors in { // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions defm : VPatConvertFP2IVL_V_RM; defm : VPatConvertFP2IVL_V_RM; -defm : VPatConvertFP2I_RM_VL_V; -defm : VPatConvertFP2I_RM_VL_V; +defm : VPatConvertFP2I_RM_VL_V; +defm : VPatConvertFP2I_RM_VL_V; defm : VPatConvertFP2IVL_V; defm : VPatConvertFP2IVL_V; @@ -2648,14 +2648,14 @@ defm : VPatConvertFP2IVL_V; defm : VPatConvertI2FPVL_V_RM; defm : VPatConvertI2FPVL_V_RM; -defm : VPatConvertI2FP_RM_VL_V; -defm : VPatConvertI2FP_RM_VL_V; +defm : VPatConvertI2FP_RM_VL_V; +defm : VPatConvertI2FP_RM_VL_V; // 13.18. Widening Floating-Point/Integer Type-Convert Instructions defm : VPatWConvertFP2IVL_V_RM; defm : VPatWConvertFP2IVL_V_RM; -defm : VPatWConvertFP2I_RM_VL_V; -defm : VPatWConvertFP2I_RM_VL_V; +defm : VPatWConvertFP2I_RM_VL_V; +defm : VPatWConvertFP2I_RM_VL_V; defm : VPatWConvertFP2IVL_V; defm : VPatWConvertFP2IVL_V; @@ -2696,8 +2696,8 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { // 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions defm : VPatNConvertFP2IVL_W_RM; defm : VPatNConvertFP2IVL_W_RM; -defm : VPatNConvertFP2I_RM_VL_W; -defm : VPatNConvertFP2I_RM_VL_W; +defm : VPatNConvertFP2I_RM_VL_W; +defm : VPatNConvertFP2I_RM_VL_W; defm : VPatNConvertFP2IVL_W; defm : VPatNConvertFP2IVL_W; @@ -2705,8 +2705,8 @@ defm : VPatNConvertFP2IVL_W; defm : VPatNConvertI2FPVL_W_RM; -defm : VPatNConvertI2FP_RM_VL_W; -defm : VPatNConvertI2FP_RM_VL_W; +defm : VPatNConvertI2FP_RM_VL_W; +defm : VPatNConvertI2FP_RM_VL_W; foreach fvtiToFWti = AllWidenableFloatVectors in { defvar fvti = fvtiToFWti.Vti; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index b54baa16d9286b..4478e246111080 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -818,7 +818,7 @@ let Predicates = [HasVendorXCVbi, IsRV32], AddedComplexity = 2 in { let usesCustomInserter = 1 in def Select_GPR_Using_CC_Imm : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, simm5:$imm5, ixlenimm:$cc, + (ins GPR:$lhs, simm5:$imm5, cond_code:$cc, GPR:$truev, GPR:$falsev), []>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td index 5068d0be0fb49b..1ad3e1b681466b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td @@ -222,7 +222,8 @@ let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq", def VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">; } -let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf" in { +let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf", + Uses = [FRM] in { def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">; def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">; } @@ -230,7 +231,7 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf class VPseudoVC_X : Pseudo<(outs), (ins OpClass:$op1, payload5:$rs2, payload5:$rd, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -243,7 +244,7 @@ class VPseudoVC_X : class VPseudoVC_XV : Pseudo<(outs), (ins OpClass:$op1, payload5:$rd, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -257,7 +258,7 @@ class VPseudoVC_XVV : Pseudo<(outs), (ins OpClass:$op1, RDClass:$rd, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -270,7 +271,7 @@ class VPseudoVC_XVV : Pseudo<(outs RDClass:$rd), (ins OpClass:$op1, payload5:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -284,7 +285,7 @@ class VPseudoVC_V_XV : Pseudo<(outs RDClass:$rd), (ins OpClass:$op1, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -298,7 +299,7 @@ class VPseudoVC_V_XVV : Pseudo<(outs RDClass:$rd), (ins OpClass:$op1, RDClass:$rs3, RS2Class:$rs2, RS1Class:$r1, - AVL:$vl, ixlenimm:$sew), []>, + AVL:$vl, sew:$sew), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -405,7 +406,7 @@ multiclass VPseudoSiFiveVFWMACC { multiclass VPseudoSiFiveVFNRCLIP { foreach i = 0-4 in - let hasSideEffects = 0 in + let hasSideEffects = 0, hasPostISelHook = 1 in defm "Pseudo" # NAME : VPseudoBinaryRoundingMode { let ParserMatchClass = LoadFPImmOperand; let PrintMethod = "printFPImmOperand"; + let OperandType = "OPERAND_UIMM5"; + let OperandNamespace = "RISCVOp"; } def RTZArg : AsmOperandClass { @@ -48,6 +50,8 @@ def rtzarg : Operand { let ParserMatchClass = RTZArg; let PrintMethod = "printFRMArg"; let DecoderMethod = "decodeRTZArg"; + let OperandType = "OPERAND_RTZARG"; + let OperandNamespace = "RISCVOp"; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 7ec13e4eaafa7d..782651fd6d0197 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -231,7 +231,7 @@ class ZvkMxSet { class VPseudoBinaryNoMask_Zvk : Pseudo<(outs RetClass:$rd_wb), - (ins RetClass:$rd, OpClass:$rs2, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + (ins RetClass:$rd, OpClass:$rs2, AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; @@ -248,7 +248,7 @@ class VPseudoTernaryNoMask_Zvk : Pseudo<(outs RetClass:$rd_wb), (ins RetClass:$rd, Op1Class:$rs2, Op2Class:$rs1, - AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, + AVL:$vl, sew:$sew, vec_policy:$policy), []>, RISCVVPseudo { let mayLoad = 0; let mayStore = 0; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 395baa5f1aab99..f050fb569946d6 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -723,8 +723,7 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( // The interleaved memory access pass will lower interleaved memory ops (i.e // a load and store followed by a specific shuffle) to vlseg/vsseg - // intrinsics. In those cases then we can treat it as if it's just one (legal) - // memory op + // intrinsics. if (!UseMaskForCond && !UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { auto *VTy = cast(VecTy); @@ -734,19 +733,27 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost( auto *SubVecTy = VectorType::get(VTy->getElementType(), VTy->getElementCount().divideCoefficientBy(Factor)); - if (VTy->getElementCount().isKnownMultipleOf(Factor) && TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment, AddressSpace, DL)) { - // FIXME: We use the memory op cost of the *legalized* type here, - // because it's getMemoryOpCost returns a really expensive cost for - // types like <6 x i8>, which show up when doing interleaves of - // Factor=3 etc. Should the memory op cost of these be cheaper? - auto *LegalVTy = VectorType::get(VTy->getElementType(), - LT.second.getVectorElementCount()); - InstructionCost LegalMemCost = getMemoryOpCost( - Opcode, LegalVTy, Alignment, AddressSpace, CostKind); - return LT.first + LegalMemCost; + + // Most available hardware today optimizes NF=2 as as one wide memory op + // + Factor * LMUL shuffle ops. + if (Factor == 2) { + InstructionCost Cost = + getMemoryOpCost(Opcode, VTy, Alignment, AddressSpace, CostKind); + MVT SubVecVT = getTLI()->getValueType(DL, SubVecTy).getSimpleVT(); + Cost += Factor * TLI->getLMULCost(SubVecVT); + return LT.first * Cost; + } + + // Otherwise, the cost is proportional to the number of elements (VL * + // Factor ops). + InstructionCost MemOpCost = + getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, + CostKind, {TTI::OK_AnyValue, TTI::OP_None}); + unsigned NumLoads = getEstimatedVLFor(VTy); + return NumLoads * MemOpCost; } } } @@ -948,12 +955,17 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { auto *RetTy = ICA.getReturnType(); switch (ICA.getID()) { + case Intrinsic::lrint: + case Intrinsic::llrint: + // We can't currently lower half or bfloat vector lrint/llrint. + if (auto *VecTy = dyn_cast(ICA.getArgTypes()[0]); + VecTy && VecTy->getElementType()->is16bitFPTy()) + return InstructionCost::getInvalid(); + [[fallthrough]]; case Intrinsic::ceil: case Intrinsic::floor: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::lrint: - case Intrinsic::llrint: case Intrinsic::round: case Intrinsic::roundeven: { // These all use the same code. diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp index 64fde8bf67ab91..62bd8d1f9d2433 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp @@ -713,21 +713,36 @@ Register SPIRVGlobalRegistry::buildGlobalVariable( return Reg; } +static std::string GetSpirvImageTypeName(const SPIRVType *Type, + MachineIRBuilder &MIRBuilder, + const std::string &Prefix); + static std::string buildSpirvTypeName(const SPIRVType *Type, MachineIRBuilder &MIRBuilder) { switch (Type->getOpcode()) { + case SPIRV::OpTypeSampledImage: { + return GetSpirvImageTypeName(Type, MIRBuilder, "sampled_image_"); + } case SPIRV::OpTypeImage: { - Register SampledTypeReg = Type->getOperand(1).getReg(); - auto *SampledType = MIRBuilder.getMRI()->getUniqueVRegDef(SampledTypeReg); - std::string TypeName = - "image_" + buildSpirvTypeName(SampledType, MIRBuilder); - for (uint32_t I = 2; I < Type->getNumOperands(); ++I) { - TypeName = (TypeName + '_' + Twine(Type->getOperand(I).getImm())).str(); - } - return TypeName; + return GetSpirvImageTypeName(Type, MIRBuilder, "image_"); + } + case SPIRV::OpTypeArray: { + MachineRegisterInfo *MRI = MIRBuilder.getMRI(); + Register ElementTypeReg = Type->getOperand(1).getReg(); + auto *ElementType = MRI->getUniqueVRegDef(ElementTypeReg); + const SPIRVType *TypeInst = MRI->getVRegDef(Type->getOperand(2).getReg()); + assert(TypeInst->getOpcode() != SPIRV::OpConstantI); + MachineInstr *ImmInst = MRI->getVRegDef(TypeInst->getOperand(1).getReg()); + assert(ImmInst->getOpcode() == TargetOpcode::G_CONSTANT); + uint32_t ArraySize = ImmInst->getOperand(1).getCImm()->getZExtValue(); + return (buildSpirvTypeName(ElementType, MIRBuilder) + Twine("[") + + Twine(ArraySize) + Twine("]")) + .str(); } case SPIRV::OpTypeFloat: return ("f" + Twine(Type->getOperand(1).getImm())).str(); + case SPIRV::OpTypeSampler: + return ("sampler"); case SPIRV::OpTypeInt: if (Type->getOperand(2).getImm()) return ("i" + Twine(Type->getOperand(1).getImm())).str(); @@ -737,6 +752,18 @@ static std::string buildSpirvTypeName(const SPIRVType *Type, } } +static std::string GetSpirvImageTypeName(const SPIRVType *Type, + MachineIRBuilder &MIRBuilder, + const std::string &Prefix) { + Register SampledTypeReg = Type->getOperand(1).getReg(); + auto *SampledType = MIRBuilder.getMRI()->getUniqueVRegDef(SampledTypeReg); + std::string TypeName = Prefix + buildSpirvTypeName(SampledType, MIRBuilder); + for (uint32_t I = 2; I < Type->getNumOperands(); ++I) { + TypeName = (TypeName + '_' + Twine(Type->getOperand(I).getImm())).str(); + } + return TypeName; +} + Register SPIRVGlobalRegistry::getOrCreateGlobalVariableWithBinding( const SPIRVType *VarType, uint32_t Set, uint32_t Binding, MachineIRBuilder &MIRBuilder) { diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index d9377fe4b91a1a..526305d7ed28ab 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -260,6 +260,7 @@ class SPIRVInstructionSelector : public InstructionSelector { SPIRVType *SrcPtrTy) const; Register buildPointerToResource(const SPIRVType *ResType, uint32_t Set, uint32_t Binding, uint32_t ArraySize, + Register IndexReg, bool IsNonUniform, MachineIRBuilder MIRBuilder) const; }; @@ -2547,6 +2548,17 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, return selectExtInst(ResVReg, ResType, I, CL::rsqrt, GL::InverseSqrt); case Intrinsic::spv_sign: return selectSign(ResVReg, ResType, I); + case Intrinsic::spv_group_memory_barrier_with_group_sync: { + Register MemSemReg = + buildI32Constant(SPIRV::MemorySemantics::SequentiallyConsistent, I); + Register ScopeReg = buildI32Constant(SPIRV::Scope::Workgroup, I); + MachineBasicBlock &BB = *I.getParent(); + return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpControlBarrier)) + .addUse(ScopeReg) + .addUse(ScopeReg) + .addUse(MemSemReg) + .constrainAllUses(TII, TRI, RBI); + } break; case Intrinsic::spv_lifetime_start: case Intrinsic::spv_lifetime_end: { unsigned Op = IID == Intrinsic::spv_lifetime_start ? SPIRV::OpLifetimeStart @@ -2605,10 +2617,15 @@ void SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg, uint32_t Set = foldImm(I.getOperand(2), MRI); uint32_t Binding = foldImm(I.getOperand(3), MRI); uint32_t ArraySize = foldImm(I.getOperand(4), MRI); + Register IndexReg = I.getOperand(5).getReg(); + bool IsNonUniform = ArraySize > 1 && foldImm(I.getOperand(6), MRI); MachineIRBuilder MIRBuilder(I); - Register VarReg = - buildPointerToResource(ResType, Set, Binding, ArraySize, MIRBuilder); + Register VarReg = buildPointerToResource(ResType, Set, Binding, ArraySize, + IndexReg, IsNonUniform, MIRBuilder); + + if (IsNonUniform) + buildOpDecorate(ResVReg, I, TII, SPIRV::Decoration::NonUniformEXT, {}); // TODO: For now we assume the resource is an image, which needs to be // loaded to get the handle. That will not be true for storage buffers. @@ -2620,10 +2637,35 @@ void SPIRVInstructionSelector::selectHandleFromBinding(Register &ResVReg, Register SPIRVInstructionSelector::buildPointerToResource( const SPIRVType *ResType, uint32_t Set, uint32_t Binding, - uint32_t ArraySize, MachineIRBuilder MIRBuilder) const { - assert(ArraySize == 1 && "Resource arrays are not implemented yet."); - return GR.getOrCreateGlobalVariableWithBinding(ResType, Set, Binding, - MIRBuilder); + uint32_t ArraySize, Register IndexReg, bool IsNonUniform, + MachineIRBuilder MIRBuilder) const { + if (ArraySize == 1) + return GR.getOrCreateGlobalVariableWithBinding(ResType, Set, Binding, + MIRBuilder); + + const SPIRVType *VarType = GR.getOrCreateSPIRVArrayType( + ResType, ArraySize, *MIRBuilder.getInsertPt(), TII); + Register VarReg = GR.getOrCreateGlobalVariableWithBinding( + VarType, Set, Binding, MIRBuilder); + + SPIRVType *ResPointerType = GR.getOrCreateSPIRVPointerType( + ResType, MIRBuilder, SPIRV::StorageClass::UniformConstant); + + Register AcReg = MRI->createVirtualRegister(&SPIRV::iIDRegClass); + if (IsNonUniform) { + // It is unclear which value needs to be marked an non-uniform, so both + // the index and the access changed are decorated as non-uniform. + buildOpDecorate(IndexReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); + buildOpDecorate(AcReg, MIRBuilder, SPIRV::Decoration::NonUniformEXT, {}); + } + + MIRBuilder.buildInstr(SPIRV::OpAccessChain) + .addDef(AcReg) + .addUse(GR.getSPIRVTypeID(ResPointerType)) + .addUse(VarReg) + .addUse(IndexReg); + + return AcReg; } bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg, diff --git a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp index 9930d067173df7..c22492ec43b095 100644 --- a/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVMergeRegionExitTargets.cpp @@ -130,6 +130,13 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { assert(false && "Unhandled terminator type."); } + AllocaInst *CreateVariable(Function &F, Type *Type, + BasicBlock::iterator Position) { + const DataLayout &DL = F.getDataLayout(); + return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg", + Position); + } + // Run the pass on the given convergence region, ignoring the sub-regions. // Returns true if the CFG changed, false otherwise. bool runOnConvergenceRegionNoRecurse(LoopInfo &LI, @@ -152,6 +159,9 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { auto NewExitTarget = BasicBlock::Create(F->getContext(), "new.exit", F); IRBuilder<> Builder(NewExitTarget); + AllocaInst *Variable = CreateVariable(*F, Builder.getInt32Ty(), + F->begin()->getFirstInsertionPt()); + // CodeGen output needs to be stable. Using the set as-is would order // the targets differently depending on the allocation pattern. // Sorting per basic-block ordering in the function. @@ -176,18 +186,16 @@ class SPIRVMergeRegionExitTargets : public FunctionPass { std::vector> ExitToVariable; for (auto Exit : SortedExits) { llvm::Value *Value = createExitVariable(Exit, TargetToValue); + IRBuilder<> B2(Exit); + B2.SetInsertPoint(Exit->getFirstInsertionPt()); + B2.CreateStore(Value, Variable); ExitToVariable.emplace_back(std::make_pair(Exit, Value)); } - // Gather the correct value depending on the exit we came from. - llvm::PHINode *node = - Builder.CreatePHI(Builder.getInt32Ty(), ExitToVariable.size()); - for (auto [BB, Value] : ExitToVariable) { - node->addIncoming(Value, BB); - } + llvm::Value *Load = Builder.CreateLoad(Builder.getInt32Ty(), Variable); // Creating the switch to jump to the correct exit target. - llvm::SwitchInst *Sw = Builder.CreateSwitch(node, SortedExitTargets[0], + llvm::SwitchInst *Sw = Builder.CreateSwitch(Load, SortedExitTargets[0], SortedExitTargets.size() - 1); for (size_t i = 1; i < SortedExitTargets.size(); i++) { BasicBlock *BB = SortedExitTargets[i]; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index db5463f5c7abb0..29ce60d9983e38 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -689,11 +689,31 @@ void RequirementHandler::initAvailableCapabilitiesForVulkan( const SPIRVSubtarget &ST) { addAvailableCaps({Capability::Shader, Capability::Linkage}); - // Provided by all supported Vulkan versions. + // Core in Vulkan 1.1 and earlier. addAvailableCaps({Capability::Int16, Capability::Int64, Capability::Float16, Capability::Float64, Capability::GroupNonUniform, Capability::Image1D, Capability::SampledBuffer, - Capability::ImageBuffer}); + Capability::ImageBuffer, + Capability::UniformBufferArrayDynamicIndexing, + Capability::SampledImageArrayDynamicIndexing, + Capability::StorageBufferArrayDynamicIndexing, + Capability::StorageImageArrayDynamicIndexing}); + + // Became core in Vulkan 1.2 + if (ST.isAtLeastSPIRVVer(VersionTuple(1, 5))) { + addAvailableCaps( + {Capability::ShaderNonUniformEXT, Capability::RuntimeDescriptorArrayEXT, + Capability::InputAttachmentArrayDynamicIndexingEXT, + Capability::UniformTexelBufferArrayDynamicIndexingEXT, + Capability::StorageTexelBufferArrayDynamicIndexingEXT, + Capability::UniformBufferArrayNonUniformIndexingEXT, + Capability::SampledImageArrayNonUniformIndexingEXT, + Capability::StorageBufferArrayNonUniformIndexingEXT, + Capability::StorageImageArrayNonUniformIndexingEXT, + Capability::InputAttachmentArrayNonUniformIndexingEXT, + Capability::UniformTexelBufferArrayNonUniformIndexingEXT, + Capability::StorageTexelBufferArrayNonUniformIndexingEXT}); + } } } // namespace SPIRV @@ -729,6 +749,8 @@ static void addOpDecorateReqs(const MachineInstr &MI, unsigned DecIndex, Dec == SPIRV::Decoration::ImplementInRegisterMapINTEL) { Reqs.addExtension( SPIRV::Extension::SPV_INTEL_global_variable_fpga_decorations); + } else if (Dec == SPIRV::Decoration::NonUniformEXT) { + Reqs.addRequirements(SPIRV::Capability::ShaderNonUniformEXT); } } @@ -848,6 +870,136 @@ static void AddAtomicFloatRequirements(const MachineInstr &MI, } } +bool isUniformTexelBuffer(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim == SPIRV::Dim::DIM_Buffer && Sampled == 1; +} + +bool isStorageTexelBuffer(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim == SPIRV::Dim::DIM_Buffer && Sampled == 2; +} + +bool isSampledImage(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim != SPIRV::Dim::DIM_Buffer && Sampled == 1; +} + +bool isInputAttachment(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim == SPIRV::Dim::DIM_SubpassData && Sampled == 2; +} + +bool isStorageImage(MachineInstr *ImageInst) { + if (ImageInst->getOpcode() != SPIRV::OpTypeImage) + return false; + uint32_t Dim = ImageInst->getOperand(2).getImm(); + uint32_t Sampled = ImageInst->getOperand(6).getImm(); + return Dim != SPIRV::Dim::DIM_Buffer && Sampled == 2; +} + +bool isCombinedImageSampler(MachineInstr *SampledImageInst) { + if (SampledImageInst->getOpcode() != SPIRV::OpTypeSampledImage) + return false; + + const MachineRegisterInfo &MRI = SampledImageInst->getMF()->getRegInfo(); + Register ImageReg = SampledImageInst->getOperand(1).getReg(); + auto *ImageInst = MRI.getUniqueVRegDef(ImageReg); + return isSampledImage(ImageInst); +} + +bool hasNonUniformDecoration(Register Reg, const MachineRegisterInfo &MRI) { + for (const auto &MI : MRI.reg_instructions(Reg)) { + if (MI.getOpcode() != SPIRV::OpDecorate) + continue; + + uint32_t Dec = MI.getOperand(1).getImm(); + if (Dec == SPIRV::Decoration::NonUniformEXT) + return true; + } + return false; +} + +void addOpAccessChainReqs(const MachineInstr &Instr, + SPIRV::RequirementHandler &Handler, + const SPIRVSubtarget &Subtarget) { + const MachineRegisterInfo &MRI = Instr.getMF()->getRegInfo(); + // Get the result type. If it is an image type, then the shader uses + // descriptor indexing. The appropriate capabilities will be added based + // on the specifics of the image. + Register ResTypeReg = Instr.getOperand(1).getReg(); + MachineInstr *ResTypeInst = MRI.getUniqueVRegDef(ResTypeReg); + + assert(ResTypeInst->getOpcode() == SPIRV::OpTypePointer); + uint32_t StorageClass = ResTypeInst->getOperand(1).getImm(); + if (StorageClass != SPIRV::StorageClass::StorageClass::UniformConstant && + StorageClass != SPIRV::StorageClass::StorageClass::Uniform && + StorageClass != SPIRV::StorageClass::StorageClass::StorageBuffer) { + return; + } + + Register PointeeTypeReg = ResTypeInst->getOperand(2).getReg(); + MachineInstr *PointeeType = MRI.getUniqueVRegDef(PointeeTypeReg); + if (PointeeType->getOpcode() != SPIRV::OpTypeImage && + PointeeType->getOpcode() != SPIRV::OpTypeSampledImage && + PointeeType->getOpcode() != SPIRV::OpTypeSampler) { + return; + } + + bool IsNonUniform = + hasNonUniformDecoration(Instr.getOperand(0).getReg(), MRI); + if (isUniformTexelBuffer(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::UniformTexelBufferArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::UniformTexelBufferArrayDynamicIndexingEXT); + } else if (isInputAttachment(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::InputAttachmentArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::InputAttachmentArrayDynamicIndexingEXT); + } else if (isStorageTexelBuffer(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::StorageTexelBufferArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::StorageTexelBufferArrayDynamicIndexingEXT); + } else if (isSampledImage(PointeeType) || + isCombinedImageSampler(PointeeType) || + PointeeType->getOpcode() == SPIRV::OpTypeSampler) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::SampledImageArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::SampledImageArrayDynamicIndexing); + } else if (isStorageImage(PointeeType)) { + if (IsNonUniform) + Handler.addRequirements( + SPIRV::Capability::StorageImageArrayNonUniformIndexingEXT); + else + Handler.addRequirements( + SPIRV::Capability::StorageImageArrayDynamicIndexing); + } +} + void addInstrRequirements(const MachineInstr &MI, SPIRV::RequirementHandler &Reqs, const SPIRVSubtarget &ST) { @@ -967,11 +1119,17 @@ void addInstrRequirements(const MachineInstr &MI, case SPIRV::OpConstantSampler: Reqs.addCapability(SPIRV::Capability::LiteralSampler); break; + case SPIRV::OpInBoundsAccessChain: + case SPIRV::OpAccessChain: + addOpAccessChainReqs(MI, Reqs, ST); + break; case SPIRV::OpTypeImage: addOpTypeImageReqs(MI, Reqs, ST); break; case SPIRV::OpTypeSampler: - Reqs.addCapability(SPIRV::Capability::ImageBasic); + if (!ST.isVulkanEnv()) { + Reqs.addCapability(SPIRV::Capability::ImageBasic); + } break; case SPIRV::OpTypeForwardPointer: // TODO: check if it's OpenCL's kernel. diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp index 3c2af34dd55239..cc34cf877dea97 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp @@ -165,6 +165,57 @@ static MachineInstr *findAssignTypeInstr(Register Reg, return nullptr; } +static void buildOpBitcast(SPIRVGlobalRegistry *GR, MachineIRBuilder &MIB, + Register ResVReg, Register OpReg) { + SPIRVType *ResType = GR->getSPIRVTypeForVReg(ResVReg); + SPIRVType *OpType = GR->getSPIRVTypeForVReg(OpReg); + assert(ResType && OpType && "Operand types are expected"); + if (!GR->isBitcastCompatible(ResType, OpType)) + report_fatal_error("incompatible result and operand types in a bitcast"); + MachineRegisterInfo *MRI = MIB.getMRI(); + if (!MRI->getRegClassOrNull(ResVReg)) + MRI->setRegClass(ResVReg, GR->getRegClass(ResType)); + MIB.buildInstr(SPIRV::OpBitcast) + .addDef(ResVReg) + .addUse(GR->getSPIRVTypeID(ResType)) + .addUse(OpReg); +} + +// We do instruction selections early instead of calling MIB.buildBitcast() +// generating the general op code G_BITCAST. When MachineVerifier validates +// G_BITCAST we see a check of a kind: if Source Type is equal to Destination +// Type then report error "bitcast must change the type". This doesn't take into +// account the notion of a typed pointer that is important for SPIR-V where a +// user may and should use bitcast between pointers with different pointee types +// (https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#OpBitcast). +// It's important for correct lowering in SPIR-V, because interpretation of the +// data type is not left to instructions that utilize the pointer, but encoded +// by the pointer declaration, and the SPIRV target can and must handle the +// declaration and use of pointers that specify the type of data they point to. +// It's not feasible to improve validation of G_BITCAST using just information +// provided by low level types of source and destination. Therefore we don't +// produce G_BITCAST as the general op code with semantics different from +// OpBitcast, but rather lower to OpBitcast immediately. As for now, the only +// difference would be that CombinerHelper couldn't transform known patterns +// around G_BUILD_VECTOR. See discussion +// in https://github.com/llvm/llvm-project/pull/110270 for even more context. +static void selectOpBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, + MachineIRBuilder MIB) { + SmallVector ToErase; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() != TargetOpcode::G_BITCAST) + continue; + MIB.setInsertPt(*MI.getParent(), MI); + buildOpBitcast(GR, MIB, MI.getOperand(0).getReg(), + MI.getOperand(1).getReg()); + ToErase.push_back(&MI); + } + } + for (MachineInstr *MI : ToErase) + MI->eraseFromParent(); +} + static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, MachineIRBuilder MIB) { // Get access to information about available extensions @@ -202,15 +253,6 @@ static void insertBitcasts(MachineFunction &MF, SPIRVGlobalRegistry *GR, } else { GR->assignSPIRVTypeToVReg(AssignedPtrType, Def, MF); MIB.buildBitcast(Def, Source); - // MachineVerifier requires that bitcast must change the type. - // Change AddressSpace if needed to hint that Def and Source points to - // different types: this doesn't change actual code generation. - LLT DefType = MRI->getType(Def); - if (DefType == MRI->getType(Source)) - MRI->setType(Def, - LLT::pointer((DefType.getAddressSpace() + 1) % - SPIRVSubtarget::MaxLegalAddressSpace, - GR->getPointerSize())); } } } @@ -1007,6 +1049,7 @@ bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) { removeImplicitFallthroughs(MF, MIB); insertSpirvDecorations(MF, MIB); insertInlineAsm(MF, GR, ST, MIB); + selectOpBitcasts(MF, GR, MIB); return true; } diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp index 211a060ee103bc..13e05b67927518 100644 --- a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp @@ -87,7 +87,7 @@ BasicBlock *getExitFor(const ConvergenceRegion *CR) { // Returns the merge block designated by I if I is a merge instruction, nullptr // otherwise. BasicBlock *getDesignatedMergeBlock(Instruction *I) { - IntrinsicInst *II = dyn_cast(I); + IntrinsicInst *II = dyn_cast_or_null(I); if (II == nullptr) return nullptr; @@ -102,7 +102,7 @@ BasicBlock *getDesignatedMergeBlock(Instruction *I) { // Returns the continue block designated by I if I is an OpLoopMerge, nullptr // otherwise. BasicBlock *getDesignatedContinueBlock(Instruction *I) { - IntrinsicInst *II = dyn_cast(I); + IntrinsicInst *II = dyn_cast_or_null(I); if (II == nullptr) return nullptr; @@ -284,18 +284,6 @@ void replaceBranchTargets(BasicBlock *BB, BasicBlock *OldTarget, assert(false && "Unhandled terminator type."); } -// Replaces basic bloc operands |OldSrc| or OpPhi instructions in |BB| by -// |NewSrc|. This function does not simplify the OpPhi instruction once -// transformed. -void replacePhiTargets(BasicBlock *BB, BasicBlock *OldSrc, BasicBlock *NewSrc) { - for (PHINode &Phi : BB->phis()) { - int index = Phi.getBasicBlockIndex(OldSrc); - if (index == -1) - continue; - Phi.setIncomingBlock(index, NewSrc); - } -} - } // anonymous namespace // Given a reducible CFG, produces a structurized CFG in the SPIR-V sense, @@ -423,7 +411,7 @@ class SPIRVStructurizer : public FunctionPass { } // Splits the given edges by recreating proxy nodes so that the destination - // OpPhi instruction can still be viable. + // has unique incoming edges from this region. // // clang-format off // @@ -436,66 +424,58 @@ class SPIRVStructurizer : public FunctionPass { // A -> D -> C // B -> D -> C // - // But if C had a phi node, adding such proxy-block breaks it. In such case, we must add 1 new block per - // exit, and patchup the phi node: + // This is fine (assuming C has no PHI nodes), but requires handling the merge instruction here. + // By adding a proxy node, we create a regular divergent shape which can easily be regularized later on. // A -> D -> D1 -> C // B -> D -> D2 -> C // - // A, B, D belongs to the construct. D is the exit. D1 and D2 are empty, just used as - // source operands for C's phi node. + // A, B, D belongs to the construct. D is the exit. D1 and D2 are empty. // // clang-format on std::vector createAliasBlocksForComplexEdges(std::vector Edges) { - std::unordered_map Seen; + std::unordered_set Seen; std::vector Output; Output.reserve(Edges.size()); for (auto &[Src, Dst] : Edges) { - auto [iterator, inserted] = Seen.insert({Src, Dst}); - if (inserted) { - Output.emplace_back(Src, Dst); - continue; + auto [Iterator, Inserted] = Seen.insert(Src); + if (!Inserted) { + // Src already a source node. Cannot have 2 edges from A to B. + // Creating alias source block. + BasicBlock *NewSrc = BasicBlock::Create( + F.getContext(), Src->getName() + ".new.src", &F); + replaceBranchTargets(Src, Dst, NewSrc); + IRBuilder<> Builder(NewSrc); + Builder.CreateBr(Dst); + Src = NewSrc; } - // The exact same edge was already seen. Ignoring. - if (iterator->second == Dst) - continue; - - // The same Src block branches to 2 distinct blocks. This will be an - // issue for the generated OpPhi. Creating alias block. - BasicBlock *NewSrc = - BasicBlock::Create(F.getContext(), "new.exit.src", &F); - replaceBranchTargets(Src, Dst, NewSrc); - replacePhiTargets(Dst, Src, NewSrc); - - IRBuilder<> Builder(NewSrc); - Builder.CreateBr(Dst); - - Seen.emplace(NewSrc, Dst); - Output.emplace_back(NewSrc, Dst); + Output.emplace_back(Src, Dst); } return Output; } + AllocaInst *CreateVariable(Function &F, Type *Type, + BasicBlock::iterator Position) { + const DataLayout &DL = F.getDataLayout(); + return new AllocaInst(Type, DL.getAllocaAddrSpace(), nullptr, "reg", + Position); + } + // Given a construct defined by |Header|, and a list of exiting edges // |Edges|, creates a new single exit node, fixing up those edges. BasicBlock *createSingleExitNode(BasicBlock *Header, std::vector &Edges) { - auto NewExit = BasicBlock::Create(F.getContext(), "new.exit", &F); - IRBuilder<> ExitBuilder(NewExit); - std::vector Dsts; - std::unordered_map DstToIndex; - - // Given 2 edges: Src1 -> Dst, Src2 -> Dst: - // If Dst has an PHI node, and Src1 and Src2 are both operands, both Src1 - // and Src2 cannot be hidden by NewExit. Create 2 new nodes: Alias1, - // Alias2 to which NewExit will branch before going to Dst. Then, patchup - // Dst PHI node to look for Alias1 and Alias2. std::vector FixedEdges = createAliasBlocksForComplexEdges(Edges); + std::vector Dsts; + std::unordered_map DstToIndex; + auto NewExit = BasicBlock::Create(F.getContext(), + Header->getName() + ".new.exit", &F); + IRBuilder<> ExitBuilder(NewExit); for (auto &[Src, Dst] : FixedEdges) { if (DstToIndex.count(Dst) != 0) continue; @@ -506,33 +486,34 @@ class SPIRVStructurizer : public FunctionPass { if (Dsts.size() == 1) { for (auto &[Src, Dst] : FixedEdges) { replaceBranchTargets(Src, Dst, NewExit); - replacePhiTargets(Dst, Src, NewExit); } ExitBuilder.CreateBr(Dsts[0]); return NewExit; } - PHINode *PhiNode = - ExitBuilder.CreatePHI(ExitBuilder.getInt32Ty(), FixedEdges.size()); - + AllocaInst *Variable = CreateVariable(F, ExitBuilder.getInt32Ty(), + F.begin()->getFirstInsertionPt()); for (auto &[Src, Dst] : FixedEdges) { - PhiNode->addIncoming(DstToIndex[Dst], Src); + IRBuilder<> B2(Src); + B2.SetInsertPoint(Src->getFirstInsertionPt()); + B2.CreateStore(DstToIndex[Dst], Variable); replaceBranchTargets(Src, Dst, NewExit); - replacePhiTargets(Dst, Src, NewExit); } + llvm::Value *Load = + ExitBuilder.CreateLoad(ExitBuilder.getInt32Ty(), Variable); + // If we can avoid an OpSwitch, generate an OpBranch. Reason is some // OpBranch are allowed to exist without a new OpSelectionMerge if one of // the branch is the parent's merge node, while OpSwitches are not. if (Dsts.size() == 2) { - Value *Condition = ExitBuilder.CreateCmp(CmpInst::ICMP_EQ, - DstToIndex[Dsts[0]], PhiNode); + Value *Condition = + ExitBuilder.CreateCmp(CmpInst::ICMP_EQ, DstToIndex[Dsts[0]], Load); ExitBuilder.CreateCondBr(Condition, Dsts[0], Dsts[1]); return NewExit; } - SwitchInst *Sw = - ExitBuilder.CreateSwitch(PhiNode, Dsts[0], Dsts.size() - 1); + SwitchInst *Sw = ExitBuilder.CreateSwitch(Load, Dsts[0], Dsts.size() - 1); for (auto It = Dsts.begin() + 1; It != Dsts.end(); ++It) { Sw->addCase(DstToIndex[*It], *It); } @@ -576,7 +557,7 @@ class SPIRVStructurizer : public FunctionPass { // Creates a new basic block in F with a single OpUnreachable instruction. BasicBlock *CreateUnreachable(Function &F) { - BasicBlock *BB = BasicBlock::Create(F.getContext(), "new.exit", &F); + BasicBlock *BB = BasicBlock::Create(F.getContext(), "unreachable", &F); IRBuilder<> Builder(BB); Builder.CreateUnreachable(); return BB; @@ -1027,17 +1008,8 @@ class SPIRVStructurizer : public FunctionPass { return Modified; } - bool IsRequiredForPhiNode(BasicBlock *BB) { - for (BasicBlock *Successor : successors(BB)) { - for (PHINode &Phi : Successor->phis()) { - if (Phi.getBasicBlockIndex(BB) != -1) - return true; - } - } - - return false; - } - + // Removes blocks not contributing to any structured CFG. This assumes there + // is no PHI nodes. bool removeUselessBlocks(Function &F) { std::vector ToRemove; @@ -1054,9 +1026,6 @@ class SPIRVStructurizer : public FunctionPass { if (MergeBlocks.count(&BB) != 0 || ContinueBlocks.count(&BB) != 0) continue; - if (IsRequiredForPhiNode(&BB)) - continue; - if (BB.getUniqueSuccessor() == nullptr) continue; @@ -1127,6 +1096,18 @@ class SPIRVStructurizer : public FunctionPass { continue; Modified = true; + + if (Merge == nullptr) { + Merge = *successors(Header).begin(); + IRBuilder<> Builder(Header); + Builder.SetInsertPoint(Header->getTerminator()); + + auto MergeAddress = BlockAddress::get(Merge->getParent(), Merge); + SmallVector Args = {MergeAddress}; + Builder.CreateIntrinsic(Intrinsic::spv_selection_merge, {}, {Args}); + continue; + } + Instruction *SplitInstruction = Merge->getTerminator(); if (isMergeInstruction(SplitInstruction->getPrevNode())) SplitInstruction = SplitInstruction->getPrevNode(); diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 13ad1eb8e8b337..d63438baca7e76 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -355,7 +355,9 @@ defm GeometryPointSize : CapabilityOperand<24, 0, 0, [], [Geometry]>; defm ImageGatherExtended : CapabilityOperand<25, 0, 0, [], [Shader]>; defm StorageImageMultisample : CapabilityOperand<27, 0, 0, [], [Shader]>; defm UniformBufferArrayDynamicIndexing : CapabilityOperand<28, 0, 0, [], [Shader]>; -defm SampledImageArrayDymnamicIndexing : CapabilityOperand<29, 0, 0, [], [Shader]>; +defm SampledImageArrayDynamicIndexing : CapabilityOperand<29, 0, 0, [], [Shader]>; +defm StorageBufferArrayDynamicIndexing : CapabilityOperand<30, 0, 0, [], [Shader]>; +defm StorageImageArrayDynamicIndexing : CapabilityOperand<31, 0, 0, [], [Shader]>; defm ClipDistance : CapabilityOperand<32, 0, 0, [], [Shader]>; defm CullDistance : CapabilityOperand<33, 0, 0, [], [Shader]>; defm SampleRateShading : CapabilityOperand<35, 0, 0, [], [Shader]>; diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp index e5384b2eb2c2c1..34854f31b3e387 100644 --- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Pass.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar/Reg2Mem.h" #include "llvm/Transforms/Utils.h" #include @@ -169,13 +170,21 @@ void SPIRVPassConfig::addIRPasses() { // - loops have a single back-edge. addPass(createLoopSimplifyPass()); - // 2. Merge the convergence region exit nodes into one. After this step, + // 2. Removes registers whose lifetime spans across basic blocks. Also + // removes phi nodes. This will greatly simplify the next steps. + addPass(createRegToMemWrapperPass()); + + // 3. Merge the convergence region exit nodes into one. After this step, // regions are single-entry, single-exit. This will help determine the // correct merge block. addPass(createSPIRVMergeRegionExitTargetsPass()); - // 3. Structurize. + // 4. Structurize. addPass(createSPIRVStructurizerPass()); + + // 5. Reduce the amount of variables required by pushing some operations + // back to virtual registers. + addPass(createPromoteMemoryToRegisterPass()); } addPass(createSPIRVRegularizerPass()); diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp index dff33b16b9cfcf..f9b361e163c909 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp @@ -460,53 +460,98 @@ PartialOrderingVisitor::getReachableFrom(BasicBlock *Start) { return Output; } -size_t PartialOrderingVisitor::visit(BasicBlock *BB, size_t Rank) { - if (Visited.count(BB) != 0) - return Rank; +bool PartialOrderingVisitor::CanBeVisited(BasicBlock *BB) const { + for (BasicBlock *P : predecessors(BB)) { + // Ignore back-edges. + if (DT.dominates(BB, P)) + continue; - Loop *L = LI.getLoopFor(BB); - const bool isLoopHeader = LI.isLoopHeader(BB); + // One of the predecessor hasn't been visited. Not ready yet. + if (BlockToOrder.count(P) == 0) + return false; - if (BlockToOrder.count(BB) == 0) { - OrderInfo Info = {Rank, Visited.size()}; - BlockToOrder.emplace(BB, Info); - } else { - BlockToOrder[BB].Rank = std::max(BlockToOrder[BB].Rank, Rank); + // If the block is a loop exit, the loop must be finished before + // we can continue. + Loop *L = LI.getLoopFor(P); + if (L == nullptr || L->contains(BB)) + continue; + + // SPIR-V requires a single back-edge. And the backend first + // step transforms loops into the simplified format. If we have + // more than 1 back-edge, something is wrong. + assert(L->getNumBackEdges() <= 1); + + // If the loop has no latch, loop's rank won't matter, so we can + // proceed. + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch); + if (Latch == nullptr) + continue; + + // The latch is not ready yet, let's wait. + if (BlockToOrder.count(Latch) == 0) + return false; } - for (BasicBlock *Predecessor : predecessors(BB)) { - if (isLoopHeader && L->contains(Predecessor)) { + return true; +} + +size_t PartialOrderingVisitor::GetNodeRank(BasicBlock *BB) const { + size_t result = 0; + + for (BasicBlock *P : predecessors(BB)) { + // Ignore back-edges. + if (DT.dominates(BB, P)) continue; - } - if (BlockToOrder.count(Predecessor) == 0) { - return Rank; + auto Iterator = BlockToOrder.end(); + Loop *L = LI.getLoopFor(P); + BasicBlock *Latch = L ? L->getLoopLatch() : nullptr; + + // If the predecessor is either outside a loop, or part of + // the same loop, simply take its rank + 1. + if (L == nullptr || L->contains(BB) || Latch == nullptr) { + Iterator = BlockToOrder.find(P); + } else { + // Otherwise, take the loop's rank (highest rank in the loop) as base. + // Since loops have a single latch, highest rank is easy to find. + // If the loop has no latch, then it doesn't matter. + Iterator = BlockToOrder.find(Latch); } + + assert(Iterator != BlockToOrder.end()); + result = std::max(result, Iterator->second.Rank + 1); } - Visited.insert(BB); + return result; +} + +size_t PartialOrderingVisitor::visit(BasicBlock *BB, size_t Unused) { + ToVisit.push(BB); + Queued.insert(BB); - SmallVector OtherSuccessors; - SmallVector LoopSuccessors; + while (ToVisit.size() != 0) { + BasicBlock *BB = ToVisit.front(); + ToVisit.pop(); - for (BasicBlock *Successor : successors(BB)) { - // Ignoring back-edges. - if (DT.dominates(Successor, BB)) + if (!CanBeVisited(BB)) { + ToVisit.push(BB); continue; + } - if (isLoopHeader && L->contains(Successor)) { - LoopSuccessors.push_back(Successor); - } else - OtherSuccessors.push_back(Successor); - } + size_t Rank = GetNodeRank(BB); + OrderInfo Info = {Rank, BlockToOrder.size()}; + BlockToOrder.emplace(BB, Info); - for (BasicBlock *BB : LoopSuccessors) - Rank = std::max(Rank, visit(BB, Rank + 1)); + for (BasicBlock *S : successors(BB)) { + if (Queued.count(S) != 0) + continue; + ToVisit.push(S); + Queued.insert(S); + } + } - size_t OutputRank = Rank; - for (BasicBlock *Item : OtherSuccessors) - OutputRank = std::max(OutputRank, visit(Item, Rank + 1)); - return OutputRank; + return 0; } PartialOrderingVisitor::PartialOrderingVisitor(Function &F) { diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.h b/llvm/lib/Target/SPIRV/SPIRVUtils.h index 83e717e6ea58fd..11fd3a5c61dcae 100644 --- a/llvm/lib/Target/SPIRV/SPIRVUtils.h +++ b/llvm/lib/Target/SPIRV/SPIRVUtils.h @@ -18,6 +18,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/TypedPointerType.h" +#include #include #include @@ -62,7 +63,9 @@ class SPIRVSubtarget; class PartialOrderingVisitor { DomTreeBuilder::BBDomTree DT; LoopInfo LI; - std::unordered_set Visited = {}; + + std::unordered_set Queued = {}; + std::queue ToVisit = {}; struct OrderInfo { size_t Rank; @@ -80,6 +83,9 @@ class PartialOrderingVisitor { // Visits |BB| with the current rank being |Rank|. size_t visit(BasicBlock *BB, size_t Rank); + size_t GetNodeRank(BasicBlock *BB) const; + bool CanBeVisited(BasicBlock *BB) const; + public: // Build the visitor to operate on the function F. PartialOrderingVisitor(Function &F); diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp index 5b407a8b6f54a0..4bba54463103bc 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp @@ -38,7 +38,7 @@ bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const { return (STI.hasFeature(Sparc::FeatureV9)) != 0; } -void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << '%' << getRegisterName(Reg); } diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h index 207a970228058d..52321d56211858 100644 --- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h +++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.h @@ -24,7 +24,7 @@ class SparcInstPrinter : public MCInstPrinter { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printRegName(raw_ostream &OS, MCRegister Reg, unsigned AltIdx) const; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp index 05113010794e0b..72b7bd60276a7a 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.cpp @@ -19,7 +19,7 @@ using namespace llvm; void SystemZGNUInstPrinter::printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const { + raw_ostream &O) { const char *RegName = getRegisterName(Reg); markup(O, Markup::Register) << '%' << RegName; } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h index 8f62ae0e16c006..7095e325c70bc0 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZGNUInstPrinter.h @@ -38,7 +38,7 @@ class SystemZGNUInstPrinter : public SystemZInstPrinterCommon { private: void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const override; + raw_ostream &O) override; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp index 9abd408324c067..ef9881932f7c08 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.cpp @@ -19,7 +19,7 @@ using namespace llvm; void SystemZHLASMInstPrinter::printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const { + raw_ostream &O) { const char *RegName = getRegisterName(Reg); // Skip register prefix so that only register number is left assert(isalpha(RegName[0]) && isdigit(RegName[1])); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h index 9a69e012c72942..ffccbec36c7491 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMInstPrinter.h @@ -37,7 +37,7 @@ class SystemZHLASMInstPrinter : public SystemZInstPrinterCommon { private: void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const override; + raw_ostream &O) override; }; } // end namespace llvm diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp index 00560ab1f4b18d..fe0f3874765614 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp @@ -57,8 +57,7 @@ void SystemZInstPrinterCommon::printOperand(const MCOperand &MO, llvm_unreachable("Invalid operand"); } -void SystemZInstPrinterCommon::printRegName(raw_ostream &O, - MCRegister Reg) const { +void SystemZInstPrinterCommon::printRegName(raw_ostream &O, MCRegister Reg) { printFormattedRegName(&MAI, Reg, O); } diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h index 9a972824f7ffb5..1a11e421691ae3 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h @@ -36,10 +36,10 @@ class SystemZInstPrinterCommon : public MCInstPrinter { void printOperand(const MCOperand &MO, const MCAsmInfo *MAI, raw_ostream &O); virtual void printFormattedRegName(const MCAsmInfo *MAI, MCRegister Reg, - raw_ostream &O) const {} + raw_ostream &O) {} // Override MCInstPrinter. - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; protected: template diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp index 8261b5aa7b4e13..47455a9a0274c2 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "VEGenAsmWriter.inc" -void VEInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void VEInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { // Generic registers have identical register name among register classes. unsigned AltIdx = VE::AsmName; // Misc registers have each own name, so no use alt-names. diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h index 65660a49c5e4dd..d5e0ebd3596ca8 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h +++ b/llvm/lib/Target/VE/MCTargetDesc/VEInstPrinter.h @@ -24,7 +24,7 @@ class VEInstPrinter : public MCInstPrinter { const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp index 4c29b59b3302e4..026f859b15d715 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp @@ -38,8 +38,7 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCRegisterInfo &MRI) : MCInstPrinter(MAI, MII, MRI) {} -void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, - MCRegister Reg) const { +void WebAssemblyInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { assert(Reg.id() != WebAssembly::UnusedReg); // Note that there's an implicit local.get/local.set here! OS << "$" << Reg.id(); diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h index b499926ab82965..e7c5e14973b630 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h @@ -35,7 +35,7 @@ class WebAssemblyInstPrinter final : public MCInstPrinter { WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp index 58b4527af6557b..c811d621e60eb7 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp @@ -35,7 +35,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter.inc" -void X86ATTInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void X86ATTInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << '%' << getRegisterName(Reg); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h index 83040c112b6885..7e525e23236229 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h @@ -23,7 +23,7 @@ class X86ATTInstPrinter final : public X86InstPrinterCommon { const MCRegisterInfo &MRI) : X86InstPrinterCommon(MAI, MII, MRI), HasCustomInstComment(false) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp index 587f923e789f02..49e8bab4c0363d 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstComments.cpp @@ -40,6 +40,20 @@ using namespace llvm; CASE_MASK_INS_COMMON(Inst, Suffix, src) \ CASE_MASKZ_INS_COMMON(Inst, Suffix, src) +#define CASE_FPCLASS_PACKED(Inst, src) \ + CASE_AVX_INS_COMMON(Inst, Z, r##src) \ + CASE_AVX_INS_COMMON(Inst, Z256, r##src) \ + CASE_AVX_INS_COMMON(Inst, Z128, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) + +#define CASE_FPCLASS_PACKED_MEM(Inst) \ + CASE_FPCLASS_PACKED(Inst, m) \ + CASE_FPCLASS_PACKED(Inst, mb) + +#define CASE_FPCLASS_SCALAR(Inst, src) \ + CASE_AVX_INS_COMMON(Inst, Z, r##src) \ + CASE_MASK_INS_COMMON(Inst, Z, r##src) + #define CASE_PTERNLOG(Inst, src) \ CASE_AVX512_INS_COMMON(Inst, Z, r##src##i) \ CASE_AVX512_INS_COMMON(Inst, Z256, r##src##i) \ @@ -949,6 +963,70 @@ static bool printPTERNLOGComments(const MCInst *MI, raw_ostream &OS, return true; } +static bool printFPCLASSComments(const MCInst *MI, raw_ostream &OS, + const MCInstrInfo &MCII) { + unsigned NumOperands = MI->getNumOperands(); + int SrcIdx; + switch (MI->getOpcode()) { + CASE_FPCLASS_PACKED(FPCLASSPBF16, r) + CASE_FPCLASS_PACKED(FPCLASSPH, r) + CASE_FPCLASS_PACKED(FPCLASSPS, r) + CASE_FPCLASS_PACKED(FPCLASSPD, r) + CASE_FPCLASS_SCALAR(FPCLASSSH, r) + CASE_FPCLASS_SCALAR(FPCLASSSS, r) + CASE_FPCLASS_SCALAR(FPCLASSSD, r) { + SrcIdx = NumOperands - 2; + break; + } + CASE_FPCLASS_PACKED_MEM(FPCLASSPBF16) + CASE_FPCLASS_PACKED_MEM(FPCLASSPH) + CASE_FPCLASS_PACKED_MEM(FPCLASSPS) + CASE_FPCLASS_PACKED_MEM(FPCLASSPD) + CASE_FPCLASS_SCALAR(FPCLASSSH, m) + CASE_FPCLASS_SCALAR(FPCLASSSS, m) + CASE_FPCLASS_SCALAR(FPCLASSSD, m) { + SrcIdx = -1; + break; + } + default: + return false; + } + StringRef DestName = getRegName(MI->getOperand(0).getReg()); + StringRef SrcName = + SrcIdx != -1 ? getRegName(MI->getOperand(SrcIdx).getReg()) : "mem"; + + OS << DestName; + printMasking(OS, MI, MCII); + OS << " = "; + + uint8_t Categories = MI->getOperand(NumOperands - 1).getImm(); + if (Categories == 0) { + OS << "false"; + } else { + static constexpr StringLiteral CategoryNames[] = { + "QuietNaN", + "PositiveZero", + "NegativeZero", + "PositiveInfinity", + "NegativeInfinity", + "Subnormal", + "Negative", + "SignalingNaN", + }; + bool Conjoin = false; + for (size_t I = 0, E = std::size(CategoryNames); I != E; ++I) { + if (Categories & (1 << I)) { + if (Conjoin) + OS << " | "; + Conjoin = true; + OS << "is" << CategoryNames[I] << '(' << SrcName << ')'; + } + } + } + OS << '\n'; + return true; +} + //===----------------------------------------------------------------------===// // Top Level Entrypoint //===----------------------------------------------------------------------===// @@ -970,6 +1048,9 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, if (printPTERNLOGComments(MI, OS, MCII)) return true; + if (printFPCLASSComments(MI, OS, MCII)) + return true; + switch (MI->getOpcode()) { default: // Not an instruction for which we can decode comments. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp index cd8b9aa6257300..8e7dae229275bb 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp @@ -33,7 +33,7 @@ using namespace llvm; #define PRINT_ALIAS_INSTR #include "X86GenAsmWriter1.inc" -void X86IntelInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void X86IntelInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { markup(OS, Markup::Register) << getRegisterName(Reg); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h index a34c06782f4042..988ab9626c3fd7 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.h @@ -24,7 +24,7 @@ class X86IntelInstPrinter final : public X86InstPrinterCommon { const MCRegisterInfo &MRI) : X86InstPrinterCommon(MAI, MII, MRI) {} - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &OS) override; bool printVecCompareInstr(const MCInst *MI, raw_ostream &OS); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1c790f3813b7a4..34bc5d76c15cea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2440,6 +2440,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMA, MVT::v32bf16, Legal); setOperationAction(ISD::SETCC, MVT::v32bf16, Custom); } + for (auto VT : {MVT::f16, MVT::f32, MVT::f64}) { + setCondCodeAction(ISD::SETOEQ, VT, Custom); + setCondCodeAction(ISD::SETUNE, VT, Custom); + } } if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { @@ -24072,6 +24076,13 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; } + if (Subtarget.hasAVX10_2()) { + if (CC == ISD::SETOEQ || CC == ISD::SETUNE) { + auto NewCC = (CC == ISD::SETOEQ) ? X86::COND_E : (X86::COND_NE); + return getSETCC(NewCC, DAG.getNode(X86ISD::UCOMX, dl, MVT::i32, Op0, Op1), + dl, DAG); + } + } // Handle floating point. X86::CondCode CondCode = TranslateX86CC(CC, dl, /*IsFP*/ true, Op0, Op1, DAG); if (CondCode == X86::COND_INVALID) diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td index 4d64eb776e09ce..0301c07dfb540b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX10.td +++ b/llvm/lib/Target/X86/X86InstrAVX10.td @@ -1541,6 +1541,24 @@ defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_ //------------------------------------------------- // AVX10 COMEF instructions //------------------------------------------------- +multiclass avx10_com_ef Opc, RegisterClass RC, ValueType VT, + SDPatternOperator OpNode, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + Domain d, X86FoldableSchedWrite sched = WriteFComX>{ + let ExeDomain = d, mayRaiseFPException = 1, isCodeGenOnly = 1 in { + def rr : AVX512, + EVEX, EVEX_V128, Sched<[sched]>, SIMD_EXC; + let mayLoad = 1 in { + def rm : AVX512, + EVEX, EVEX_V128, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + } + } +} + multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, string OpcodeStr, Domain d, @@ -1564,6 +1582,15 @@ multiclass avx10_com_ef_int Opc, X86VectorVTInfo _, SDNode OpNode, } let Defs = [EFLAGS], Uses = [MXCSR], Predicates = [HasAVX10_2] in { + defm VUCOMXSDZ : avx10_com_ef<0x2e, FR64X, f64, X86ucomi512, + "vucomxsd", f64mem, loadf64, SSEPackedDouble>, + TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; + defm VUCOMXSHZ : avx10_com_ef<0x2e, FR16X, f16, X86ucomi512, + "vucomxsh", f16mem, loadf16, SSEPackedSingle>, + T_MAP5, XD, EVEX_CD8<16, CD8VT1>; + defm VUCOMXSSZ : avx10_com_ef<0x2e, FR32X, f32, X86ucomi512, + "vucomxss", f32mem, loadf32, SSEPackedSingle>, + TB, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm VCOMXSDZ : avx10_com_ef_int<0x2f, v2f64x_info, X86comi512, "vcomxsd", SSEPackedDouble>, TB, XS, VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index bae223243b3dc9..520284d1d7a488 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3068,6 +3068,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (auto KindCost = Entry->Cost[CostKind]) return *KindCost; } + + if ((ISD == ISD::FP_ROUND && SimpleDstTy == MVT::f16) || + (ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) { + // fp16 conversions not covered by any table entries require a libcall. + // Return a large (arbitrary) number to model this. + return InstructionCost(64); + } } // Fall back to legalized types. @@ -3174,11 +3181,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, TTI::CastContextHint::None, CostKind); } - if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) { - // Conversion requires a libcall. - return InstructionCost::getInvalid(); - } - // TODO: Allow non-throughput costs that aren't binary. auto AdjustCost = [&CostKind](InstructionCost Cost, InstructionCost N = 1) -> InstructionCost { diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp index eda90d3101ab48..707c4a79087280 100644 --- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp +++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.cpp @@ -27,7 +27,7 @@ using namespace llvm; #include "XCoreGenAsmWriter.inc" -void XCoreInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const { +void XCoreInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) { OS << StringRef(getRegisterName(Reg)).lower(); } diff --git a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h index 916ca99968fbb0..2b47de457322ee 100644 --- a/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h +++ b/llvm/lib/Target/XCore/MCTargetDesc/XCoreInstPrinter.h @@ -31,7 +31,7 @@ class XCoreInstPrinter : public MCInstPrinter { void printInstruction(const MCInst *MI, uint64_t Address, raw_ostream &O); static const char *getRegisterName(MCRegister Reg); - void printRegName(raw_ostream &OS, MCRegister Reg) const override; + void printRegName(raw_ostream &OS, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp index fe1dc0e2e483e7..e04d7bd211216f 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.cpp @@ -74,7 +74,7 @@ void XtensaInstPrinter::printInst(const MCInst *MI, uint64_t Address, printAnnotation(O, Annot); } -void XtensaInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const { +void XtensaInstPrinter::printRegName(raw_ostream &O, MCRegister Reg) { O << getRegisterName(Reg); } diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h index 46a35ae6f4e3fa..4122b1ff2310b7 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaInstPrinter.h @@ -36,7 +36,7 @@ class XtensaInstPrinter : public MCInstPrinter { static void printOperand(const MCOperand &MO, raw_ostream &O); // Override MCInstPrinter. - void printRegName(raw_ostream &O, MCRegister Reg) const override; + void printRegName(raw_ostream &O, MCRegister Reg) override; void printInst(const MCInst *MI, uint64_t Address, StringRef Annot, const MCSubtargetInfo &STI, raw_ostream &O) override; diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index caa5a97747ee57..de5b5c39c9ed27 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -751,17 +751,6 @@ Error RISCVISAInfo::checkDependency() { if (HasZvl && !HasVector) return getExtensionRequiresError("zvl*b", "v' or 'zve*"); - if (!HasVector) - for (auto Ext : - {"zvbb", "zvbc32e", "zvkb", "zvkg", "zvkgs", "zvkned", "zvknha", "zvksed", "zvksh"}) - if (Exts.count(Ext)) - return getExtensionRequiresError(Ext, "v' or 'zve*"); - - if (!Exts.count("zve64x")) - for (auto Ext : {"zvknhb", "zvbc"}) - if (Exts.count(Ext)) - return getExtensionRequiresError(Ext, "v' or 'zve64*"); - if ((HasZcmt || Exts.count("zcmp")) && HasD && (HasC || Exts.count("zcd"))) return getError(Twine("'") + (HasZcmt ? "zcmt" : "zcmp") + "' extension is incompatible with '" + diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 0395ee62ae988b..070df429bfc265 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -60,6 +60,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/TimeProfiler.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Coroutines/ABI.h" #include "llvm/Transforms/Coroutines/CoroInstr.h" @@ -118,7 +119,6 @@ class CoroCloner { TargetTransformInfo &TTI; -public: /// Create a cloner for a switch lowering. CoroCloner(Function &OrigF, const Twine &Suffix, coro::Shape &Shape, Kind FKind, TargetTransformInfo &TTI) @@ -140,6 +140,30 @@ class CoroCloner { assert(ActiveSuspend && "need active suspend point for continuation"); } +public: + /// Create a clone for a switch lowering. + static Function *createClone(Function &OrigF, const Twine &Suffix, + coro::Shape &Shape, Kind FKind, + TargetTransformInfo &TTI) { + TimeTraceScope FunctionScope("CoroCloner"); + + CoroCloner Cloner(OrigF, Suffix, Shape, FKind, TTI); + Cloner.create(); + return Cloner.getFunction(); + } + + /// Create a clone for a continuation lowering. + static Function *createClone(Function &OrigF, const Twine &Suffix, + coro::Shape &Shape, Function *NewF, + AnyCoroSuspendInst *ActiveSuspend, + TargetTransformInfo &TTI) { + TimeTraceScope FunctionScope("CoroCloner"); + + CoroCloner Cloner(OrigF, Suffix, Shape, NewF, ActiveSuspend, TTI); + Cloner.create(); + return Cloner.getFunction(); + } + Function *getFunction() const { assert(NewF != nullptr && "declaration not yet set"); return NewF; @@ -1466,13 +1490,16 @@ struct SwitchCoroutineSplitter { TargetTransformInfo &TTI) { assert(Shape.ABI == coro::ABI::Switch); + // Create a resume clone by cloning the body of the original function, + // setting new entry block and replacing coro.suspend an appropriate value + // to force resume or cleanup pass for every suspend point. createResumeEntryBlock(F, Shape); - auto *ResumeClone = - createClone(F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI); - auto *DestroyClone = - createClone(F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI); - auto *CleanupClone = - createClone(F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI); + auto *ResumeClone = CoroCloner::createClone( + F, ".resume", Shape, CoroCloner::Kind::SwitchResume, TTI); + auto *DestroyClone = CoroCloner::createClone( + F, ".destroy", Shape, CoroCloner::Kind::SwitchUnwind, TTI); + auto *CleanupClone = CoroCloner::createClone( + F, ".cleanup", Shape, CoroCloner::Kind::SwitchCleanup, TTI); postSplitCleanup(*ResumeClone); postSplitCleanup(*DestroyClone); @@ -1562,17 +1589,6 @@ struct SwitchCoroutineSplitter { } private: - // Create a resume clone by cloning the body of the original function, setting - // new entry block and replacing coro.suspend an appropriate value to force - // resume or cleanup pass for every suspend point. - static Function *createClone(Function &F, const Twine &Suffix, - coro::Shape &Shape, CoroCloner::Kind FKind, - TargetTransformInfo &TTI) { - CoroCloner Cloner(F, Suffix, Shape, FKind, TTI); - Cloner.create(); - return Cloner.getFunction(); - } - // Create an entry block for a resume function with a switch that will jump to // suspend points. static void createResumeEntryBlock(Function &F, coro::Shape &Shape) { @@ -1872,7 +1888,8 @@ void coro::AsyncABI::splitCoroutine(Function &F, coro::Shape &Shape, auto *Suspend = Shape.CoroSuspends[Idx]; auto *Clone = Clones[Idx]; - CoroCloner(F, "resume." + Twine(Idx), Shape, Clone, Suspend, TTI).create(); + CoroCloner::createClone(F, "resume." + Twine(Idx), Shape, Clone, Suspend, + TTI); } } @@ -2001,7 +2018,8 @@ void coro::AnyRetconABI::splitCoroutine(Function &F, coro::Shape &Shape, auto Suspend = Shape.CoroSuspends[i]; auto Clone = Clones[i]; - CoroCloner(F, "resume." + Twine(i), Shape, Clone, Suspend, TTI).create(); + CoroCloner::createClone(F, "resume." + Twine(i), Shape, Clone, Suspend, + TTI); } } diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index 5dc82a8dfb2dbe..a2d38717f38d14 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -87,28 +87,14 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, return NewBB; } -// Clone OldFunc into NewFunc, transforming the old arguments into references to -// VMap values. -// -void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - ValueToValueMapTy &VMap, - CloneFunctionChangeType Changes, - SmallVectorImpl &Returns, - const char *NameSuffix, ClonedCodeInfo *CodeInfo, - ValueMapTypeRemapper *TypeMapper, - ValueMaterializer *Materializer) { - NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat); - assert(NameSuffix && "NameSuffix cannot be null!"); - -#ifndef NDEBUG - for (const Argument &I : OldFunc->args()) - assert(VMap.count(&I) && "No mapping from source argument specified!"); -#endif - - bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly; - - // Copy all attributes other than those stored in the AttributeList. We need - // to remap the parameter indices of the AttributeList. +void llvm::CloneFunctionAttributesInto(Function *NewFunc, + const Function *OldFunc, + ValueToValueMapTy &VMap, + bool ModuleLevelChanges, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + // Copy all attributes other than those stored in Function's AttributeList + // which holds e.g. parameters and return value attributes. AttributeList NewAttrs = NewFunc->getAttributes(); NewFunc->copyAttributesFrom(OldFunc); NewFunc->setAttributes(NewAttrs); @@ -140,6 +126,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Clone any argument attributes that are present in the VMap. for (const Argument &OldArg : OldFunc->args()) { if (Argument *NewArg = dyn_cast(VMap[&OldArg])) { + // Remap the parameter indices. NewArgAttrs[NewArg->getArgNo()] = OldAttrs.getParamAttrs(OldArg.getArgNo()); } @@ -148,6 +135,29 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, NewFunc->setAttributes( AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(), NewArgAttrs)); +} + +// Clone OldFunc into NewFunc, transforming the old arguments into references to +// VMap values. +void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, + ValueToValueMapTy &VMap, + CloneFunctionChangeType Changes, + SmallVectorImpl &Returns, + const char *NameSuffix, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper, + ValueMaterializer *Materializer) { + NewFunc->setIsNewDbgInfoFormat(OldFunc->IsNewDbgInfoFormat); + assert(NameSuffix && "NameSuffix cannot be null!"); + +#ifndef NDEBUG + for (const Argument &I : OldFunc->args()) + assert(VMap.count(&I) && "No mapping from source argument specified!"); +#endif + + bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly; + + CloneFunctionAttributesInto(NewFunc, OldFunc, VMap, ModuleLevelChanges, + TypeMapper, Materializer); // Everything else beyond this point deals with function instructions, // so if we are dealing with a function declaration, we're done. diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 4ad426285ce2f0..a27cb4dd219c30 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -181,9 +181,21 @@ namespace { } } }; - } // end anonymous namespace +static IntrinsicInst *getConvergenceEntry(BasicBlock &BB) { + auto *I = BB.getFirstNonPHI(); + while (I) { + if (auto *IntrinsicCall = dyn_cast(I)) { + if (IntrinsicCall->isEntry()) { + return IntrinsicCall; + } + } + I = I->getNextNode(); + } + return nullptr; +} + /// Get or create a target for the branch from ResumeInsts. BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { if (InnerResumeDest) return InnerResumeDest; @@ -2496,15 +2508,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // fully implements convergence control tokens, there is no mixing of // controlled and uncontrolled convergent operations in the whole program. if (CB.isConvergent()) { - auto *I = CalledFunc->getEntryBlock().getFirstNonPHI(); - if (auto *IntrinsicCall = dyn_cast(I)) { - if (IntrinsicCall->getIntrinsicID() == - Intrinsic::experimental_convergence_entry) { - if (!ConvergenceControlToken) { - return InlineResult::failure( - "convergent call needs convergencectrl operand"); - } - } + if (!ConvergenceControlToken && + getConvergenceEntry(CalledFunc->getEntryBlock())) { + return InlineResult::failure( + "convergent call needs convergencectrl operand"); } } @@ -2795,13 +2802,10 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } if (ConvergenceControlToken) { - auto *I = FirstNewBlock->getFirstNonPHI(); - if (auto *IntrinsicCall = dyn_cast(I)) { - if (IntrinsicCall->getIntrinsicID() == - Intrinsic::experimental_convergence_entry) { - IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); - IntrinsicCall->eraseFromParent(); - } + IntrinsicInst *IntrinsicCall = getConvergenceEntry(*FirstNewBlock); + if (IntrinsicCall) { + IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken); + IntrinsicCall->eraseFromParent(); } } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 778d928252e051..3d638e52328b57 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -467,11 +467,12 @@ class InnerLoopVectorizer { ElementCount MinProfitableTripCount, unsigned UnrollFactor, LoopVectorizationLegality *LVL, LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks) + ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks, + VPlan &Plan) : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Legal(LVL), Cost(CM), BFI(BFI), - PSI(PSI), RTChecks(RTChecks) { + PSI(PSI), RTChecks(RTChecks), Plan(Plan) { // Query this against the original loop and save it here because the profile // of the original loop header may change as the transformation happens. OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize( @@ -498,7 +499,7 @@ class InnerLoopVectorizer { createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs); /// Fix the vectorized code, taking care of header phi's, live-outs, and more. - void fixVectorizedLoop(VPTransformState &State, VPlan &Plan); + void fixVectorizedLoop(VPTransformState &State); // Return true if any runtime check is added. bool areSafetyChecksAdded() { return AddedSafetyChecks; } @@ -513,7 +514,7 @@ class InnerLoopVectorizer { VPTransformState &State); /// Fix the non-induction PHIs in \p Plan. - void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State); + void fixNonInductionPHIs(VPTransformState &State); /// Create a new phi node for the induction variable \p OrigPhi to resume /// iteration count in the scalar epilogue, from where the vectorized loop @@ -541,8 +542,7 @@ class InnerLoopVectorizer { /// Set up the values of the IVs correctly when exiting the vector loop. virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, VPlan &Plan, - VPTransformState &State); + BasicBlock *MiddleBlock, VPTransformState &State); /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. @@ -581,8 +581,8 @@ class InnerLoopVectorizer { /// Allow subclasses to override and print debug traces before/after vplan /// execution, when trace information is requested. - virtual void printDebugTracesAtStart(){}; - virtual void printDebugTracesAtEnd(){}; + virtual void printDebugTracesAtStart() {} + virtual void printDebugTracesAtEnd() {} /// The original loop. Loop *OrigLoop; @@ -674,6 +674,8 @@ class InnerLoopVectorizer { /// Structure to hold information about generated runtime checks, responsible /// for cleaning the checks, if vectorization turns out unprofitable. GeneratedRTChecks &RTChecks; + + VPlan &Plan; }; /// Encapsulate information regarding vectorization of a loop and its epilogue. @@ -715,10 +717,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer { OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &Checks) + GeneratedRTChecks &Checks, VPlan &Plan) : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF, LVL, - CM, BFI, PSI, Checks), + CM, BFI, PSI, Checks, Plan), EPI(EPI) {} // Override this function to handle the more complex control flow around the @@ -755,9 +757,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &Check) + GeneratedRTChecks &Check, VPlan &Plan) : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, LVL, CM, BFI, PSI, Check) {} + EPI, LVL, CM, BFI, PSI, Check, Plan) {} /// Implements the interface for creating a vectorized skeleton using the /// *main loop* strategy (ie the first pass of vplan execution). std::pair @@ -773,7 +775,7 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer { void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, VPlan &Plan, + BasicBlock *MiddleBlock, VPTransformState &State) override {}; }; @@ -789,9 +791,9 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer { OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI, LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, - GeneratedRTChecks &Checks) + GeneratedRTChecks &Checks, VPlan &Plan) : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, LVL, CM, BFI, PSI, Checks) { + EPI, LVL, CM, BFI, PSI, Checks, Plan) { TripCount = EPI.TripCount; } /// Implements the interface for creating a vectorized skeleton using the @@ -1310,7 +1312,7 @@ class LoopVectorizationCostModel { return false; case cl::BOU_FALSE: return true; - }; + } llvm_unreachable("impossible case value"); } @@ -2751,7 +2753,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton( void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, Value *VectorTripCount, Value *EndValue, - BasicBlock *MiddleBlock, VPlan &Plan, + BasicBlock *MiddleBlock, VPTransformState &State) { // There are two kinds of external IV usages - those that use the value // computed in the last iteration (the PHI) and those that use the penultimate @@ -2931,11 +2933,10 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, TargetTransformInfo::TCK_RecipThroughput); } -void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, - VPlan &Plan) { +void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { // Fix widened non-induction PHIs by setting up the PHI operands. if (EnableVPlanNativePath) - fixNonInductionPHIs(Plan, State); + fixNonInductionPHIs(State); // Forget the original basic block. PSE.getSE()->forgetLoop(OrigLoop); @@ -2966,7 +2967,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (const auto &Entry : Legal->getInductionVars()) fixupIVUsers(Entry.first, Entry.second, getOrCreateVectorTripCount(nullptr), - IVEndValues[Entry.first], LoopMiddleBlock, Plan, State); + IVEndValues[Entry.first], LoopMiddleBlock, State); } // Fix live-out phis not already fixed earlier. @@ -3077,8 +3078,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { } while (Changed); } -void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan, - VPTransformState &State) { +void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) { auto Iter = vp_depth_first_deep(Plan.getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { for (VPRecipeBase &P : VPBB->phis()) { @@ -7744,7 +7744,7 @@ DenseMap LoopVectorizationPlanner::executePlan( // 3. Fix the vectorized code: take care of header phi's, live-outs, // predication, updating analyses. - ILV.fixVectorizedLoop(State, BestVPlan); + ILV.fixVectorizedLoop(State); ILV.printDebugTracesAtEnd(); @@ -9065,7 +9065,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) { if (!getDecisionAndClampRange(ApplyIG, Range)) continue; InterleaveGroups.insert(IG); - }; + } // --------------------------------------------------------------------------- // Construct recipes for the instructions in the loop @@ -9727,7 +9727,7 @@ static bool processLoopInVPlanNativePath( GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), AddBranchWeights); InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, - VF.Width, 1, LVL, &CM, BFI, PSI, Checks); + VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan); LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" << L->getHeader()->getParent()->getName() << "\"\n"); LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false); @@ -10215,11 +10215,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop, then // interleave it. + VPlan &BestPlan = LVP.getPlanFor(VF.Width); InnerLoopVectorizer Unroller( L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1), - ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks); + ElementCount::getFixed(1), IC, &LVL, &CM, BFI, PSI, Checks, BestPlan); - VPlan &BestPlan = LVP.getPlanFor(VF.Width); LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false); ORE->emit([&]() { @@ -10236,15 +10236,16 @@ bool LoopVectorizePass::processLoop(Loop *L) { VectorizationFactor EpilogueVF = LVP.selectEpilogueVectorizationFactor(VF.Width, IC); if (EpilogueVF.Width.isVector()) { + std::unique_ptr BestMainPlan(BestPlan.duplicate()); // The first pass vectorizes the main loop and creates a scalar epilogue // to be vectorized by executing the plan (potentially with a different // factor) again shortly afterwards. EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1); EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, - EPI, &LVL, &CM, BFI, PSI, Checks); + EPI, &LVL, &CM, BFI, PSI, Checks, + *BestMainPlan); - std::unique_ptr BestMainPlan(BestPlan.duplicate()); auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV, DT, false); ++LoopsVectorized; @@ -10253,11 +10254,11 @@ bool LoopVectorizePass::processLoop(Loop *L) { // edges from the first pass. EPI.MainLoopVF = EPI.EpilogueVF; EPI.MainLoopUF = EPI.EpilogueUF; + VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF); EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, - Checks); + Checks, BestEpiPlan); - VPlan &BestEpiPlan = LVP.getPlanFor(EPI.EpilogueVF); VPRegionBlock *VectorLoop = BestEpiPlan.getVectorLoopRegion(); VPBasicBlock *Header = VectorLoop->getEntryBasicBlock(); Header->setName("vec.epilog.vector.body"); @@ -10340,7 +10341,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { } else { InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, - PSI, Checks); + PSI, Checks, BestPlan); LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp index 346d8a90589f55..1cc6356300e492 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Legality.cpp @@ -55,6 +55,21 @@ LegalityAnalysis::notVectorizableBasedOnOpcodesAndTypes( return ResultReason::DiffMathFlags; } + // TODO: Allow vectorization by using common flags. + // For now Pack if they don't have the same wrap flags. + bool CanHaveWrapFlags = + isa(I0) || isa(I0); + if (CanHaveWrapFlags) { + bool NUW0 = I0->hasNoUnsignedWrap(); + bool NSW0 = I0->hasNoSignedWrap(); + if (any_of(drop_begin(Bndl), [NUW0, NSW0](auto *V) { + return cast(V)->hasNoUnsignedWrap() != NUW0 || + cast(V)->hasNoSignedWrap() != NSW0; + })) { + return ResultReason::DiffWrapFlags; + } + } + // TODO: Missing checks return std::nullopt; diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index a34e34a0d71f1e..0e0c64f6df9cba 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1688,13 +1688,18 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef CallArguments, Type *Ty, - bool MayReadFromMemory, bool MayWriteToMemory, - bool MayHaveSideEffects, DebugLoc DL = {}) + DebugLoc DL = {}) : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments), - VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), - MayReadFromMemory(MayReadFromMemory), - MayWriteToMemory(MayWriteToMemory), - MayHaveSideEffects(MayHaveSideEffects) {} + VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) { + LLVMContext &Ctx = Ty->getContext(); + AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID); + MemoryEffects ME = Attrs.getMemoryEffects(); + MayReadFromMemory = ME.onlyWritesMemory(); + MayWriteToMemory = ME.onlyReadsMemory(); + MayHaveSideEffects = MayWriteToMemory || + !Attrs.hasFnAttr(Attribute::NoUnwind) || + !Attrs.hasFnAttr(Attribute::WillReturn); + } ~VPWidenIntrinsicRecipe() override = default; @@ -2050,6 +2055,10 @@ class VPHeaderPHIRecipe : public VPSingleDefRecipe { /// Generate the phi nodes. void execute(VPTransformState &State) override = 0; + /// Return the cost of this header phi recipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, @@ -2295,6 +2304,10 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe { void execute(VPTransformState &State) override; + /// Return the cost of this first-order recurrence phi recipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override; + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Print the recipe. void print(raw_ostream &O, const Twine &Indent, @@ -3134,6 +3147,13 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe { /// canonical, i.e. has the same start and step (of 1) as the canonical IV. bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const; + + /// Return the cost of this VPCanonicalIVPHIRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override { + // For now, match the behavior of the legacy cost model. + return 0; + } }; /// A recipe for generating the active lane mask for the vector loop that is @@ -3196,6 +3216,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe. void execute(VPTransformState &State) override; + /// Return the cost of this VPEVLBasedIVPHIRecipe. + InstructionCost computeCost(ElementCount VF, + VPCostContext &Ctx) const override { + // For now, match the behavior of the legacy cost model. + return 0; + } + /// Returns true if the recipe only uses the first lane of operand \p Op. bool onlyFirstLaneUsed(const VPValue *Op) const override { assert(is_contained(operands(), Op) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b1e6086398c4df..de7023167df899 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1589,6 +1589,11 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent, } #endif +InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + return Ctx.TTI.getCFInstrCost(Instruction::PHI, TTI::TCK_RecipThroughput); +} + /// This function adds /// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...) /// to each vector element of Val. The sequence starts at StartIndex. @@ -3334,6 +3339,23 @@ void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { State.set(this, Phi); } +InstructionCost +VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + if (VF.isScalable() && VF.getKnownMinValue() == 1) + return InstructionCost::getInvalid(); + + SmallVector Mask(VF.getKnownMinValue()); + std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1); + Type *VectorTy = + ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF); + + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Splice, + cast(VectorTy), Mask, CostKind, + VF.getKnownMinValue() - 1); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 03c4110761ac6a..355781f955052e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1489,7 +1489,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { Ops.push_back(&EVL); return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops, TypeInfo.inferScalarType(Sel), - false, false, false); + Sel->getDebugLoc()); }) .Default([&](VPRecipeBase *R) { return nullptr; }); diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll index c6826760a45bee..b0964983550814 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -425,15 +425,15 @@ define void @rint_fp16() { define void @lrint() { ; CHECK-LABEL: 'lrint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.bf16(bfloat undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16bf16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.lrint.i64.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> undef) @@ -490,15 +490,15 @@ define void @lrint() { define void @lrint_fp16() { ; CHECK-LABEL: 'lrint_fp16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.lrint.i64.f16(half undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.lrint.nxv1i64.nxv1f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.lrint.nxv2i64.nxv2f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.lrint.nxv4i64.nxv4f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.lrint.nxv8i64.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.lrint.nxv16i64.nxv16f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i64 @llvm.lrint.f16(half undef) @@ -517,15 +517,15 @@ define void @lrint_fp16() { define void @llrint() { ; CHECK-LABEL: 'llrint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.bf16(bfloat undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8bf16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2bf16(<2 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4bf16(<4 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8bf16(<8 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16bf16(<16 x bfloat> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8bf16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16bf16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call i64 @llvm.llrint.i64.f32(float undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> undef) @@ -582,15 +582,15 @@ define void @llrint() { define void @llrint_fp16() { ; CHECK-LABEL: 'llrint_fp16' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call i64 @llvm.llrint.i64.f16(half undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8f16( undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %2 = call <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %3 = call <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %4 = call <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %5 = call <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %6 = call @llvm.llrint.nxv1i64.nxv1f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %7 = call @llvm.llrint.nxv2i64.nxv2f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %8 = call @llvm.llrint.nxv4i64.nxv4f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %9 = call @llvm.llrint.nxv8i64.nxv8f16( undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %10 = call @llvm.llrint.nxv16i64.nxv16f16( undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i64 @llvm.llrint.f16(half undef) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll index 588d852d7f26e2..196e7376677a54 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fadd.ll @@ -13,6 +13,12 @@ define void @reduce_fadd_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_bfloat' @@ -24,6 +30,12 @@ define void @reduce_fadd_bfloat() { ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -34,6 +46,12 @@ define void @reduce_fadd_bfloat() { %v32 = call fast bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call fast bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call fast bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call fast bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call fast bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call fast bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call fast bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call fast bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call fast bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, undef) ret void } @@ -47,6 +65,12 @@ define void @reduce_fadd_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fadd_half' @@ -58,6 +82,12 @@ define void @reduce_fadd_half() { ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_half' @@ -69,6 +99,12 @@ define void @reduce_fadd_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) @@ -79,6 +115,12 @@ define void @reduce_fadd_half() { %v32 = call fast half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) %V64 = call fast half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) %V128 = call fast half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call fast half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, undef) + %NXV2 = call fast half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, undef) + %NXV4 = call fast half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, undef) + %NXV8 = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) + %NXV16 = call fast half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) + %NXV32 = call fast half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) ret void } @@ -92,6 +134,11 @@ define void @reduce_fadd_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_float' @@ -103,6 +150,11 @@ define void @reduce_fadd_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) @@ -113,6 +165,11 @@ define void @reduce_fadd_float() { %v32 = call fast float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) %V64 = call fast float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) %V128 = call fast float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, undef) + %NXV2 = call fast float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, undef) + %NXV4 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) + %NXV8 = call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) + %NXV16 = call fast float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) ret void } @@ -126,6 +183,10 @@ define void @reduce_fadd_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fadd_double' @@ -137,6 +198,10 @@ define void @reduce_fadd_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) @@ -147,11 +212,15 @@ define void @reduce_fadd_double() { %v32 = call fast double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) %V64 = call fast double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) %V128 = call fast double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call fast double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, undef) + %NXV2 = call fast double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, undef) + %NXV4 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, undef) + %NXV8 = call fast double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, undef) ret void } -define void @reduce_oredered_fadd_bfloat() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_bfloat' +define void @reduce_ordered_fadd_bfloat() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_bfloat' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) @@ -160,9 +229,15 @@ define void @reduce_oredered_fadd_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_bfloat' +; SIZE-LABEL: 'reduce_ordered_fadd_bfloat' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0xR0000, <1 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call bfloat @llvm.vector.reduce.fadd.v2bf16(bfloat 0xR0000, <2 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call bfloat @llvm.vector.reduce.fadd.v4bf16(bfloat 0xR0000, <4 x bfloat> undef) @@ -171,6 +246,12 @@ define void @reduce_oredered_fadd_bfloat() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call bfloat @llvm.vector.reduce.fadd.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -181,11 +262,17 @@ define void @reduce_oredered_fadd_bfloat() { %v32 = call bfloat @llvm.vector.reduce.fadd.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call bfloat @llvm.vector.reduce.fadd.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call bfloat @llvm.vector.reduce.fadd.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call bfloat @llvm.vector.reduce.fadd.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call bfloat @llvm.vector.reduce.fadd.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call bfloat @llvm.vector.reduce.fadd.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call bfloat @llvm.vector.reduce.fadd.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call bfloat @llvm.vector.reduce.fadd.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call bfloat @llvm.vector.reduce.fadd.nxv32bf16(bfloat 0.0, undef) ret void } -define void @reduce_oredered_fadd_half() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_half' +define void @reduce_ordered_fadd_half() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_half' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) @@ -194,9 +281,15 @@ define void @reduce_oredered_fadd_half() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_half' +; SIZE-LABEL: 'reduce_ordered_fadd_half' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0xH0000, <1 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call half @llvm.vector.reduce.fadd.v2f16(half 0xH0000, <2 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> undef) @@ -205,6 +298,12 @@ define void @reduce_oredered_fadd_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call half @llvm.vector.reduce.fadd.v1f16(half 0.0, <1 x half> undef) @@ -215,11 +314,17 @@ define void @reduce_oredered_fadd_half() { %v32 = call half @llvm.vector.reduce.fadd.v32f16(half 0.0, <32 x half> undef) %V64 = call half @llvm.vector.reduce.fadd.v64f16(half 0.0, <64 x half> undef) %V128 = call half @llvm.vector.reduce.fadd.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call half @llvm.vector.reduce.fadd.nxv1f16(half 0.0, undef) + %NXV2 = call half @llvm.vector.reduce.fadd.nxv2f16(half 0.0, undef) + %NXV4 = call half @llvm.vector.reduce.fadd.nxv4f16(half 0.0, undef) + %NXV8 = call half @llvm.vector.reduce.fadd.nxv8f16(half 0.0, undef) + %NXV16 = call half @llvm.vector.reduce.fadd.nxv16f16(half 0.0, undef) + %NXV32 = call half @llvm.vector.reduce.fadd.nxv32f16(half 0.0, undef) ret void } -define void @reduce_oredered_fadd_float() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_float' +define void @reduce_ordered_fadd_float() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_float' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) @@ -228,9 +333,14 @@ define void @reduce_oredered_fadd_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_float' +; SIZE-LABEL: 'reduce_ordered_fadd_float' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.000000e+00, <1 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float 0.000000e+00, <2 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef) @@ -239,6 +349,11 @@ define void @reduce_oredered_fadd_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fadd.v1f32(float 0.0, <1 x float> undef) @@ -249,11 +364,16 @@ define void @reduce_oredered_fadd_float() { %v32 = call float @llvm.vector.reduce.fadd.v32f32(float 0.0, <32 x float> undef) %V64 = call float @llvm.vector.reduce.fadd.v64f32(float 0.0, <64 x float> undef) %V128 = call float @llvm.vector.reduce.fadd.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, undef) + %NXV2 = call float @llvm.vector.reduce.fadd.nxv2f32(float 0.0, undef) + %NXV4 = call float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, undef) + %NXV8 = call float @llvm.vector.reduce.fadd.nxv8f32(float 0.0, undef) + %NXV16 = call float @llvm.vector.reduce.fadd.nxv16f32(float 0.0, undef) ret void } -define void @reduce_oredered_fadd_double() { -; FP-REDUCE-LABEL: 'reduce_oredered_fadd_double' +define void @reduce_ordered_fadd_double() { +; FP-REDUCE-LABEL: 'reduce_ordered_fadd_double' ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) @@ -262,9 +382,13 @@ define void @reduce_oredered_fadd_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; -; SIZE-LABEL: 'reduce_oredered_fadd_double' +; SIZE-LABEL: 'reduce_ordered_fadd_double' ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.000000e+00, <1 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef) @@ -273,6 +397,10 @@ define void @reduce_oredered_fadd_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fadd.v1f64(double 0.0, <1 x double> undef) @@ -283,30 +411,9 @@ define void @reduce_oredered_fadd_double() { %v32 = call double @llvm.vector.reduce.fadd.v32f64(double 0.0, <32 x double> undef) %V64 = call double @llvm.vector.reduce.fadd.v64f64(double 0.0, <64 x double> undef) %V128 = call double @llvm.vector.reduce.fadd.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call double @llvm.vector.reduce.fadd.nxv1f64(double 0.0, undef) + %NXV2 = call double @llvm.vector.reduce.fadd.nxv2f64(double 0.0, undef) + %NXV4 = call double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, undef) + %NXV8 = call double @llvm.vector.reduce.fadd.nxv8f64(double 0.0, undef) ret void } - -declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>) -declare half @llvm.vector.reduce.fadd.v2f16(half, <2 x half>) -declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>) -declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>) -declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>) -declare half @llvm.vector.reduce.fadd.v32f16(half, <32 x half>) -declare half @llvm.vector.reduce.fadd.v64f16(half, <64 x half>) -declare half @llvm.vector.reduce.fadd.v128f16(half, <128 x half>) -declare float @llvm.vector.reduce.fadd.v1f32(float, <1 x float>) -declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>) -declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>) -declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>) -declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) -declare float @llvm.vector.reduce.fadd.v32f32(float, <32 x float>) -declare float @llvm.vector.reduce.fadd.v64f32(float, <64 x float>) -declare float @llvm.vector.reduce.fadd.v128f32(float, <128 x float>) -declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>) -declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>) -declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>) -declare double @llvm.vector.reduce.fadd.v8f64(double, <8 x double>) -declare double @llvm.vector.reduce.fadd.v16f64(double, <16 x double>) -declare double @llvm.vector.reduce.fadd.v32f64(double, <32 x double>) -declare double @llvm.vector.reduce.fadd.v64f64(double, <64 x double>) -declare double @llvm.vector.reduce.fadd.v128f64(double, <128 x double>) diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll index 162562c7b89310..211bcb1343eea4 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-fmul.ll @@ -13,6 +13,12 @@ define void @reduce_fmul_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_bfloat' @@ -24,6 +30,12 @@ define void @reduce_fmul_bfloat() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -34,6 +46,12 @@ define void @reduce_fmul_bfloat() { %v32 = call fast bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call fast bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call fast bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call fast bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call fast bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call fast bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call fast bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call fast bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call fast bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, undef) ret void } @@ -47,6 +65,12 @@ define void @reduce_fmul_half() { ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFH-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFH-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; FP-REDUCE-ZVFHMIN-LABEL: 'reduce_fmul_half' @@ -58,6 +82,12 @@ define void @reduce_fmul_half() { ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 541 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 573 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-ZVFHMIN-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_half' @@ -69,6 +99,12 @@ define void @reduce_fmul_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef) @@ -79,6 +115,12 @@ define void @reduce_fmul_half() { %v32 = call fast half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef) %V64 = call fast half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef) %V128 = call fast half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call fast half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, undef) + %NXV2 = call fast half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, undef) + %NXV4 = call fast half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, undef) + %NXV8 = call fast half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, undef) + %NXV16 = call fast half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, undef) + %NXV32 = call fast half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, undef) ret void } @@ -92,6 +134,11 @@ define void @reduce_fmul_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 451 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 483 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 547 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_float' @@ -103,6 +150,11 @@ define void @reduce_fmul_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef) @@ -113,6 +165,11 @@ define void @reduce_fmul_float() { %v32 = call fast float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef) %V64 = call fast float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef) %V128 = call fast float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call fast float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, undef) + %NXV2 = call fast float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, undef) + %NXV4 = call fast float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, undef) + %NXV8 = call fast float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, undef) + %NXV16 = call fast float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, undef) ret void } @@ -126,6 +183,10 @@ define void @reduce_fmul_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 393 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 457 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 585 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmul_double' @@ -137,6 +198,10 @@ define void @reduce_fmul_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call fast double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef) @@ -147,6 +212,10 @@ define void @reduce_fmul_double() { %v32 = call fast double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef) %V64 = call fast double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef) %V128 = call fast double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call fast double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, undef) + %NXV2 = call fast double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, undef) + %NXV4 = call fast double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, undef) + %NXV8 = call fast double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, undef) ret void } @@ -160,6 +229,12 @@ define void @reduce_ordered_fmul_bfloat() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_bfloat' @@ -171,6 +246,12 @@ define void @reduce_ordered_fmul_bfloat() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0xR0000, <32 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0xR0000, <64 x bfloat> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0xR0000, <128 x bfloat> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0xR0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0xR0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call bfloat @llvm.vector.reduce.fmul.v1bf16(bfloat 0.0, <1 x bfloat> undef) @@ -181,6 +262,12 @@ define void @reduce_ordered_fmul_bfloat() { %v32 = call bfloat @llvm.vector.reduce.fmul.v32bf16(bfloat 0.0, <32 x bfloat> undef) %V64 = call bfloat @llvm.vector.reduce.fmul.v64bf16(bfloat 0.0, <64 x bfloat> undef) %V128 = call bfloat @llvm.vector.reduce.fmul.v128bf16(bfloat 0.0, <128 x bfloat> undef) + %NXV1 = call bfloat @llvm.vector.reduce.fmul.nxv1bf16(bfloat 0.0, undef) + %NXV2 = call bfloat @llvm.vector.reduce.fmul.nxv2bf16(bfloat 0.0, undef) + %NXV4 = call bfloat @llvm.vector.reduce.fmul.nxv4bf16(bfloat 0.0, undef) + %NXV8 = call bfloat @llvm.vector.reduce.fmul.nxv8bf16(bfloat 0.0, undef) + %NXV16 = call bfloat @llvm.vector.reduce.fmul.nxv16bf16(bfloat 0.0, undef) + %NXV32 = call bfloat @llvm.vector.reduce.fmul.nxv32bf16(bfloat 0.0, undef) ret void } @@ -194,6 +281,12 @@ define void @reduce_ordered_fmul_half() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_half' @@ -205,6 +298,12 @@ define void @reduce_ordered_fmul_half() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0xH0000, <32 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 191 for instruction: %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0xH0000, <64 x half> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0xH0000, <128 x half> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0xH0000, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0xH0000, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call half @llvm.vector.reduce.fmul.v1f16(half 0.0, <1 x half> undef) @@ -215,6 +314,12 @@ define void @reduce_ordered_fmul_half() { %v32 = call half @llvm.vector.reduce.fmul.v32f16(half 0.0, <32 x half> undef) %V64 = call half @llvm.vector.reduce.fmul.v64f16(half 0.0, <64 x half> undef) %V128 = call half @llvm.vector.reduce.fmul.v128f16(half 0.0, <128 x half> undef) + %NXV1 = call half @llvm.vector.reduce.fmul.nxv1f16(half 0.0, undef) + %NXV2 = call half @llvm.vector.reduce.fmul.nxv2f16(half 0.0, undef) + %NXV4 = call half @llvm.vector.reduce.fmul.nxv4f16(half 0.0, undef) + %NXV8 = call half @llvm.vector.reduce.fmul.nxv8f16(half 0.0, undef) + %NXV16 = call half @llvm.vector.reduce.fmul.nxv16f16(half 0.0, undef) + %NXV32 = call half @llvm.vector.reduce.fmul.nxv32f16(half 0.0, undef) ret void } @@ -228,6 +333,11 @@ define void @reduce_ordered_fmul_float() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 254 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 508 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_float' @@ -239,6 +349,11 @@ define void @reduce_ordered_fmul_float() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.000000e+00, <32 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.000000e+00, <64 x float> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 380 for instruction: %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.000000e+00, <128 x float> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fmul.v1f32(float 0.0, <1 x float> undef) @@ -249,6 +364,11 @@ define void @reduce_ordered_fmul_float() { %v32 = call float @llvm.vector.reduce.fmul.v32f32(float 0.0, <32 x float> undef) %V64 = call float @llvm.vector.reduce.fmul.v64f32(float 0.0, <64 x float> undef) %V128 = call float @llvm.vector.reduce.fmul.v128f32(float 0.0, <128 x float> undef) + %NXV1 = call float @llvm.vector.reduce.fmul.nxv1f32(float 0.0, undef) + %NXV2 = call float @llvm.vector.reduce.fmul.nxv2f32(float 0.0, undef) + %NXV4 = call float @llvm.vector.reduce.fmul.nxv4f32(float 0.0, undef) + %NXV8 = call float @llvm.vector.reduce.fmul.nxv8f32(float 0.0, undef) + %NXV16 = call float @llvm.vector.reduce.fmul.nxv16f32(float 0.0, undef) ret void } @@ -262,6 +382,10 @@ define void @reduce_ordered_fmul_double() { ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; FP-REDUCE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; FP-REDUCE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SIZE-LABEL: 'reduce_ordered_fmul_double' @@ -273,6 +397,10 @@ define void @reduce_ordered_fmul_double() { ; SIZE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.000000e+00, <32 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.000000e+00, <64 x double> undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.000000e+00, <128 x double> undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.000000e+00, undef) +; SIZE-NEXT: Cost Model: Invalid cost for instruction: %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.000000e+00, undef) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fmul.v1f64(double 0.0, <1 x double> undef) @@ -283,5 +411,9 @@ define void @reduce_ordered_fmul_double() { %v32 = call double @llvm.vector.reduce.fmul.v32f64(double 0.0, <32 x double> undef) %V64 = call double @llvm.vector.reduce.fmul.v64f64(double 0.0, <64 x double> undef) %V128 = call double @llvm.vector.reduce.fmul.v128f64(double 0.0, <128 x double> undef) + %NXV1 = call double @llvm.vector.reduce.fmul.nxv1f64(double 0.0, undef) + %NXV2 = call double @llvm.vector.reduce.fmul.nxv2f64(double 0.0, undef) + %NXV4 = call double @llvm.vector.reduce.fmul.nxv4f64(double 0.0, undef) + %NXV8 = call double @llvm.vector.reduce.fmul.nxv8f64(double 0.0, undef) ret void } diff --git a/llvm/test/Assembler/invalid-inttype.ll b/llvm/test/Assembler/invalid-inttype.ll index c8aa7c66b79e4d..9e3c31148af2d6 100644 --- a/llvm/test/Assembler/invalid-inttype.ll +++ b/llvm/test/Assembler/invalid-inttype.ll @@ -1,5 +1,5 @@ ; RUN: not llvm-as --disable-output %s 2>&1 | FileCheck -DFILE=%s %s ; i8388609 is the smallest integer type that can't be represented in LLVM IR -; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range! +; CHECK: [[FILE]]:[[@LINE+1]]:21: error: bitwidth for integer type out of range @i2 = common global i8388609 0, align 4 diff --git a/llvm/test/Assembler/invalid-name.ll b/llvm/test/Assembler/invalid-name.ll index 74133e60df54d5..52e2bda3adbabd 100644 Binary files a/llvm/test/Assembler/invalid-name.ll and b/llvm/test/Assembler/invalid-name.ll differ diff --git a/llvm/test/Assembler/invalid-name2.ll b/llvm/test/Assembler/invalid-name2.ll index 8a848798a54caf..78da4dc3d1b8d0 100644 Binary files a/llvm/test/Assembler/invalid-name2.ll and b/llvm/test/Assembler/invalid-name2.ll differ diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/icmp-flags.mir b/llvm/test/CodeGen/AArch64/GlobalISel/icmp-flags.mir new file mode 100644 index 00000000000000..59e4de9440416f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/icmp-flags.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=none -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: icmp_samesign +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: icmp_samesign + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %cmp:_(s1) = samesign G_ICMP intpred(eq), %y(s32), %y + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s1) = samesign G_ICMP intpred(eq), %y:_(s32), %y:_ + %zext:_(s32) = G_ZEXT %cmp:_(s1) + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +... +--- +name: icmp_differentsign +body: | + bb.0: + liveins: $w0, $w1 + ; CHECK-LABEL: name: icmp_differentsign + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: %cmp:_(s1) = G_ICMP intpred(eq), %y(s32), %y + ; CHECK-NEXT: %zext:_(s32) = G_ZEXT %cmp(s1) + ; CHECK-NEXT: $w0 = COPY %zext(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %x:_(s32) = COPY $w0 + %y:_(s32) = COPY $w1 + %cmp:_(s1) = G_ICMP intpred(eq), %y:_(s32), %y:_ + %zext:_(s32) = G_ZEXT %cmp:_(s1) + $w0 = COPY %zext + RET_ReallyLR implicit $w0 +--- diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslater-samesign.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslater-samesign.ll new file mode 100644 index 00000000000000..0173f92c982203 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslater-samesign.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator < %s | FileCheck %s + + +define <2 x i1> @call_icmp_samesign_vector(<2 x i32> %a, <2 x i32> %b) { + ; CHECK-LABEL: name: call_icmp_samesign_vector + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: %2:_(<2 x s1>) = samesign G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT %2(<2 x s1>) + ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 +entry: + %result = icmp samesign ult <2 x i32> %a, %b + ret <2 x i1> %result +} + +define <2 x i1> @call_icmp_vector(<2 x i32> %a, <2 x i32> %b) { + ; CHECK-LABEL: name: call_icmp_vector + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s1>) = G_ICMP intpred(ult), [[COPY]](<2 x s32>), [[COPY1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s32>) = G_ANYEXT [[ICMP]](<2 x s1>) + ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $d0 +entry: + %result = icmp ult <2 x i32> %a, %b + ret <2 x i1> %result +} + +define i1 @call_icmp(i32 %a) { + ; CHECK-LABEL: name: call_icmp + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %result = icmp ult i32 %a, 3 + ret i1 %result +} + +define i1 @call_icmp_samesign(i32 %a) { + ; CHECK-LABEL: name: call_icmp_samesign + ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %2:_(s1) = samesign G_ICMP intpred(ult), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s8) = G_ZEXT %2(s1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ZEXT]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 +entry: + %result = icmp samesign ult i32 %a, 3 + ret i1 %result +} diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll index 1dfd977186b0e7..7af7c235f9ac16 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -3560,4 +3560,16 @@ entry: ret <4 x i16> %vrshrn_n1 } +define <8 x i16> @signbits_vashr(<8 x i16> %a) { +; CHECK-LABEL: signbits_vashr: +; CHECK: // %bb.0: +; CHECK-NEXT: sshr.8h v0, v0, #8 +; CHECK-NEXT: sshr.8h v0, v0, #9 +; CHECK-NEXT: ret + %b = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> ) + %c = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %b, <8 x i16> ) + %d = ashr <8 x i16> %c, + ret <8 x i16> %d +} + declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>) diff --git a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir index 0963ecbb123115..a2532a854923f5 100644 --- a/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir +++ b/llvm/test/CodeGen/AArch64/jump-table-duplicate.mir @@ -1,4 +1,5 @@ # RUN: llc -run-pass=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s +# RUN: llc -passes=tailduplication -tail-dup-size=4 %s -o - | FileCheck %s # JumpTableDest32 uses an `adr` to a temporary label (itself). If duplicated we # cannot guarantee reachability for any uses after the first. diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll index c1810c678ea522..6e2ecfca9e963e 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s -; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -61,10 +61,10 @@ define <8 x i8> @concat_v8i8(<4 x i8> %op1, <4 x i8> %op2) { define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: concat_v16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -172,10 +172,10 @@ define <4 x i16> @concat_v4i16(<2 x i16> %op1, <2 x i16> %op2) { define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: concat_v8i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -270,10 +270,10 @@ define <2 x i32> @concat_v2i32(<1 x i32> %op1, <1 x i32> %op2) { define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: concat_v4i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -340,10 +340,10 @@ define void @concat_v16i32(ptr %a, ptr %b, ptr %c) { define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: concat_v2i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.d, vl1 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -406,17 +406,33 @@ define void @concat_v8i64(ptr %a, ptr %b, ptr %c) { ; define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) { -; CHECK-LABEL: concat_v4f16: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: mov z3.h, z0.h[1] -; CHECK-NEXT: zip1 z1.h, z1.h, z2.h -; CHECK-NEXT: zip1 z0.h, z0.h, z3.h -; CHECK-NEXT: zip1 z0.s, z0.s, z1.s -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: ret +; SVE2-LABEL: concat_v4f16: +; SVE2: // %bb.0: +; SVE2-NEXT: cnth x8 +; SVE2-NEXT: adrp x9, .LCPI15_0 +; SVE2-NEXT: adrp x10, .LCPI15_1 +; SVE2-NEXT: mov z2.h, w8 +; SVE2-NEXT: ldr q3, [x9, :lo12:.LCPI15_0] +; SVE2-NEXT: ldr q4, [x10, :lo12:.LCPI15_1] +; SVE2-NEXT: ptrue p0.h, vl8 +; SVE2-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; SVE2-NEXT: mad z2.h, p0/m, z3.h, z4.h +; SVE2-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: ret +; +; SME-LABEL: concat_v4f16: +; SME: // %bb.0: +; SME-NEXT: // kill: def $d1 killed $d1 def $z1 +; SME-NEXT: // kill: def $d0 killed $d0 def $z0 +; SME-NEXT: mov z2.h, z1.h[1] +; SME-NEXT: mov z3.h, z0.h[1] +; SME-NEXT: zip1 z1.h, z1.h, z2.h +; SME-NEXT: zip1 z0.h, z0.h, z3.h +; SME-NEXT: zip1 z0.s, z0.s, z1.s +; SME-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SME-NEXT: ret ; ; NONEON-NOSVE-LABEL: concat_v4f16: ; NONEON-NOSVE: // %bb.0: @@ -436,10 +452,10 @@ define <4 x half> @concat_v4f16(<2 x half> %op1, <2 x half> %op2) { define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: concat_v8f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -534,10 +550,10 @@ define <2 x float> @concat_v2f32(<1 x float> %op1, <1 x float> %op2) { define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: concat_v4f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -604,10 +620,10 @@ define void @concat_v16f32(ptr %a, ptr %b, ptr %c) { define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK-LABEL: concat_v2f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.d, vl1 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll index f1771a753826cc..2282e74af5d006 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE -; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 -; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=SVE2 +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=SVE2 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" @@ -842,16 +842,16 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) { ; ; SVE2-LABEL: test_copysign_v4f32_v4f64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x1] +; SVE2-NEXT: ldp q1, q0, [x1] ; SVE2-NEXT: ptrue p0.d -; SVE2-NEXT: ldr q2, [x0] -; SVE2-NEXT: fcvt z1.s, p0/m, z1.d ; SVE2-NEXT: fcvt z0.s, p0/m, z0.d +; SVE2-NEXT: fcvt z1.s, p0/m, z1.d ; SVE2-NEXT: ptrue p0.s, vl2 -; SVE2-NEXT: uzp1 z1.s, z1.s, z1.s -; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s -; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s +; SVE2-NEXT: uzp1 z3.s, z0.s, z0.s +; SVE2-NEXT: uzp1 z2.s, z1.s, z1.s ; SVE2-NEXT: mov z1.s, #0x7fffffff +; SVE2-NEXT: splice z0.s, p0, { z2.s, z3.s } +; SVE2-NEXT: ldr q2, [x0] ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret @@ -1237,16 +1237,16 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) { ; ; SVE2-LABEL: test_copysign_v8f16_v8f32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x1] +; SVE2-NEXT: ldp q1, q0, [x1] ; SVE2-NEXT: ptrue p0.s -; SVE2-NEXT: ldr q2, [x0] -; SVE2-NEXT: fcvt z1.h, p0/m, z1.s ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s +; SVE2-NEXT: fcvt z1.h, p0/m, z1.s ; SVE2-NEXT: ptrue p0.h, vl4 -; SVE2-NEXT: uzp1 z1.h, z1.h, z1.h -; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h -; SVE2-NEXT: splice z0.h, p0, z0.h, z1.h +; SVE2-NEXT: uzp1 z3.h, z0.h, z0.h +; SVE2-NEXT: uzp1 z2.h, z1.h, z1.h ; SVE2-NEXT: mov z1.h, #32767 // =0x7fff +; SVE2-NEXT: splice z0.h, p0, { z2.h, z3.h } +; SVE2-NEXT: ldr q2, [x0] ; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret @@ -1349,5 +1349,3 @@ declare <8 x float> @llvm.copysign.v8f32(<8 x float> %a, <8 x float> %b) #0 declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0 -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll index 516772b8ca6640..1fdcd4f8268708 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE -; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 -; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2 +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE target triple = "aarch64-unknown-linux-gnu" @@ -26,19 +25,6 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v4i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: shl v0.4h, v0.4h, #8 -; NEON-NOSVE-NEXT: shl v1.4h, v1.4h, #8 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: sshr v0.4h, v0.4h, #8 -; NEON-NOSVE-NEXT: sshr v1.4h, v1.4h, #8 -; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v4i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -85,27 +71,12 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v8i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0 -; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; NEON-NOSVE-NEXT: xtn v0.8b, v0.8h -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v8i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -177,45 +148,21 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h } +; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v16i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: sshll2 v2.8h, v1.16b, #0 -; NEON-NOSVE-NEXT: sshll2 v3.8h, v0.16b, #0 -; NEON-NOSVE-NEXT: sshll v1.8h, v1.8b, #0 -; NEON-NOSVE-NEXT: sshll v0.8h, v0.8b, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: sshll2 v4.4s, v2.8h, #0 -; NEON-NOSVE-NEXT: sshll2 v5.4s, v3.8h, #0 -; NEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 -; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 -; NEON-NOSVE-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; NEON-NOSVE-NEXT: sshll2 v5.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: sshll2 v3.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: sdivr z3.s, p0/m, z3.s, z5.s -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h -; NEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v16i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -319,7 +266,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: sunpklo z4.h, z2.b ; CHECK-NEXT: sunpklo z2.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: sunpklo z5.s, z4.h ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 ; CHECK-NEXT: sunpklo z3.s, z3.h @@ -328,7 +274,6 @@ define void @sdiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q5, [x0] ; CHECK-NEXT: sunpklo z16.h, z5.b ; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: sunpklo z5.h, z5.b ; CHECK-NEXT: sunpklo z18.s, z16.h ; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8 @@ -337,81 +282,36 @@ define void @sdiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: sunpklo z18.s, z5.h ; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 ; CHECK-NEXT: sunpklo z5.s, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s ; CHECK-NEXT: sunpklo z16.s, z6.h ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 ; CHECK-NEXT: sunpklo z6.s, z6.h +; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h ; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s +; CHECK-NEXT: uzp1 z18.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z19.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h ; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h -; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h +; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z4.h, z17.h, z17.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z6.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h +; CHECK-NEXT: splice z2.h, p0, { z20.h, z21.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } +; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p0, { z18.h, z19.h } +; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h } +; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: splice z1.b, p0, z1.b, z3.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b } +; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b } +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v32i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q6, q3, [x1] -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ldr q2, [x0, #16] -; NEON-NOSVE-NEXT: sshll2 v1.8h, v3.16b, #0 -; NEON-NOSVE-NEXT: sshll2 v4.8h, v2.16b, #0 -; NEON-NOSVE-NEXT: sshll v3.8h, v3.8b, #0 -; NEON-NOSVE-NEXT: sshll v2.8h, v2.8b, #0 -; NEON-NOSVE-NEXT: sshll2 v7.8h, v6.16b, #0 -; NEON-NOSVE-NEXT: sshll v6.8h, v6.8b, #0 -; NEON-NOSVE-NEXT: sshll2 v0.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: sshll2 v5.4s, v4.8h, #0 -; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0 -; NEON-NOSVE-NEXT: sshll2 v17.4s, v7.8h, #0 -; NEON-NOSVE-NEXT: sshll v7.4s, v7.4h, #0 -; NEON-NOSVE-NEXT: sdivr z0.s, p0/m, z0.s, z5.s -; NEON-NOSVE-NEXT: sshll2 v5.4s, v2.8h, #0 -; NEON-NOSVE-NEXT: sshll v2.4s, v2.4h, #0 -; NEON-NOSVE-NEXT: sdivr z1.s, p0/m, z1.s, z4.s -; NEON-NOSVE-NEXT: sshll2 v4.4s, v3.8h, #0 -; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 -; NEON-NOSVE-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; NEON-NOSVE-NEXT: ldr q5, [x0] -; NEON-NOSVE-NEXT: sshll2 v16.8h, v5.16b, #0 -; NEON-NOSVE-NEXT: sshll v5.8h, v5.8b, #0 -; NEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; NEON-NOSVE-NEXT: sshll2 v18.4s, v16.8h, #0 -; NEON-NOSVE-NEXT: sshll v16.4s, v16.4h, #0 -; NEON-NOSVE-NEXT: sdivr z17.s, p0/m, z17.s, z18.s -; NEON-NOSVE-NEXT: sshll2 v18.4s, v5.8h, #0 -; NEON-NOSVE-NEXT: sshll v5.4s, v5.4h, #0 -; NEON-NOSVE-NEXT: sdivr z7.s, p0/m, z7.s, z16.s -; NEON-NOSVE-NEXT: sshll2 v16.4s, v6.8h, #0 -; NEON-NOSVE-NEXT: sshll v6.4s, v6.4h, #0 -; NEON-NOSVE-NEXT: sdivr z16.s, p0/m, z16.s, z18.s -; NEON-NOSVE-NEXT: sdiv z5.s, p0/m, z5.s, z6.s -; NEON-NOSVE-NEXT: sdiv z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: uzp1 v3.8h, v7.8h, v17.8h -; NEON-NOSVE-NEXT: uzp1 v5.8h, v5.8h, v16.8h -; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h -; NEON-NOSVE-NEXT: uzp1 v2.16b, v5.16b, v3.16b -; NEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b -; NEON-NOSVE-NEXT: stp q2, q0, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v32i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -571,17 +471,6 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v2i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: shl v1.2s, v1.2s, #16 -; NEON-NOSVE-NEXT: shl v0.2s, v0.2s, #16 -; NEON-NOSVE-NEXT: ptrue p0.s, vl2 -; NEON-NOSVE-NEXT: sshr v1.2s, v1.2s, #16 -; NEON-NOSVE-NEXT: sshr v0.2s, v0.2s, #16 -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v2i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -614,15 +503,6 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v4i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v4i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -664,26 +544,14 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v8i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v8i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -748,41 +616,18 @@ define void @sdiv_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uzp1 z4.h, z5.h, z5.h ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z3.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h } +; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h } +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v16i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q4, q1, [x1] -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ldr q0, [x0, #16] -; NEON-NOSVE-NEXT: sshll2 v2.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: sshll2 v3.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: sshll2 v5.4s, v4.8h, #0 -; NEON-NOSVE-NEXT: sshll v4.4s, v4.4h, #0 -; NEON-NOSVE-NEXT: sshll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: sshll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: ldr q3, [x0] -; NEON-NOSVE-NEXT: sshll2 v6.4s, v3.8h, #0 -; NEON-NOSVE-NEXT: sshll v3.4s, v3.4h, #0 -; NEON-NOSVE-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; NEON-NOSVE-NEXT: sdiv z3.s, p0/m, z3.s, z4.s -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v5.8h -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; NEON-NOSVE-NEXT: stp q1, q0, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v16i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -876,15 +721,6 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v2i32: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.s, vl2 -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1 -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v2i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -913,15 +749,6 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v4i32: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1 -; NEON-NOSVE-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v4i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -957,17 +784,6 @@ define void @sdiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v8i32: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q0, q3, [x1] -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ldp q1, q2, [x0] -; NEON-NOSVE-NEXT: sdivr z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: movprfx z1, z2 -; NEON-NOSVE-NEXT: sdiv z1.s, p0/m, z1.s, z3.s -; NEON-NOSVE-NEXT: stp q0, q1, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v8i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -1021,15 +837,6 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v1i64: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.d, vl1 -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1 -; NEON-NOSVE-NEXT: sdiv z0.d, p0/m, z0.d, z1.d -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v1i64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #16 @@ -1055,15 +862,6 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v2i64: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.d, vl2 -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1 -; NEON-NOSVE-NEXT: sdiv z0.d, p0/m, z0.d, z1.d -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v2i64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -1093,17 +891,6 @@ define void @sdiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: sdiv_v4i64: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q0, q3, [x1] -; NEON-NOSVE-NEXT: ptrue p0.d, vl2 -; NEON-NOSVE-NEXT: ldp q1, q2, [x0] -; NEON-NOSVE-NEXT: sdivr z0.d, p0/m, z0.d, z1.d -; NEON-NOSVE-NEXT: movprfx z1, z2 -; NEON-NOSVE-NEXT: sdiv z1.d, p0/m, z1.d, z3.d -; NEON-NOSVE-NEXT: stp q0, q1, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: sdiv_v4i64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -1135,9 +922,7 @@ define void @sdiv_v4i64(ptr %a, ptr %b) { ret void } -; ; UDIV -; define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-LABEL: udiv_v4i8: @@ -1154,17 +939,6 @@ define <4 x i8> @udiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v4i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: bic v0.4h, #255, lsl #8 -; NEON-NOSVE-NEXT: bic v1.4h, #255, lsl #8 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v4i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -1211,27 +985,12 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v8i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0 -; NEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; NEON-NOSVE-NEXT: xtn v0.8b, v0.8h -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v8i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -1303,45 +1062,21 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h } +; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v16i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ushll2 v2.8h, v1.16b, #0 -; NEON-NOSVE-NEXT: ushll2 v3.8h, v0.16b, #0 -; NEON-NOSVE-NEXT: ushll v1.8h, v1.8b, #0 -; NEON-NOSVE-NEXT: ushll v0.8h, v0.8b, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ushll2 v4.4s, v2.8h, #0 -; NEON-NOSVE-NEXT: ushll2 v5.4s, v3.8h, #0 -; NEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 -; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 -; NEON-NOSVE-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; NEON-NOSVE-NEXT: ushll2 v5.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: ushll2 v3.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: udivr z3.s, p0/m, z3.s, z5.s -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v3.8h -; NEON-NOSVE-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v16i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -1445,7 +1180,6 @@ define void @udiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: uunpklo z4.h, z2.b ; CHECK-NEXT: uunpklo z2.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uunpklo z5.s, z4.h ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 ; CHECK-NEXT: uunpklo z3.s, z3.h @@ -1454,7 +1188,6 @@ define void @udiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ldr q5, [x0] ; CHECK-NEXT: uunpklo z16.h, z5.b ; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uunpklo z5.h, z5.b ; CHECK-NEXT: uunpklo z18.s, z16.h ; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8 @@ -1463,81 +1196,36 @@ define void @udiv_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: uunpklo z18.s, z5.h ; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 ; CHECK-NEXT: uunpklo z5.s, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s ; CHECK-NEXT: uunpklo z16.s, z6.h ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 ; CHECK-NEXT: uunpklo z6.s, z6.h +; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h ; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z18.s +; CHECK-NEXT: uzp1 z18.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z19.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z21.h, z7.h, z7.h ; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h -; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h +; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z4.h, z17.h, z17.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z6.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h +; CHECK-NEXT: splice z2.h, p0, { z20.h, z21.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } +; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p0, { z18.h, z19.h } +; CHECK-NEXT: splice z1.h, p0, { z4.h, z5.h } +; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: splice z1.b, p0, z1.b, z3.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z2.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z3.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b } +; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b } +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v32i8: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q6, q3, [x1] -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ldr q2, [x0, #16] -; NEON-NOSVE-NEXT: ushll2 v1.8h, v3.16b, #0 -; NEON-NOSVE-NEXT: ushll2 v4.8h, v2.16b, #0 -; NEON-NOSVE-NEXT: ushll v3.8h, v3.8b, #0 -; NEON-NOSVE-NEXT: ushll v2.8h, v2.8b, #0 -; NEON-NOSVE-NEXT: ushll2 v7.8h, v6.16b, #0 -; NEON-NOSVE-NEXT: ushll v6.8h, v6.8b, #0 -; NEON-NOSVE-NEXT: ushll2 v0.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: ushll2 v5.4s, v4.8h, #0 -; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0 -; NEON-NOSVE-NEXT: ushll2 v17.4s, v7.8h, #0 -; NEON-NOSVE-NEXT: ushll v7.4s, v7.4h, #0 -; NEON-NOSVE-NEXT: udivr z0.s, p0/m, z0.s, z5.s -; NEON-NOSVE-NEXT: ushll2 v5.4s, v2.8h, #0 -; NEON-NOSVE-NEXT: ushll v2.4s, v2.4h, #0 -; NEON-NOSVE-NEXT: udivr z1.s, p0/m, z1.s, z4.s -; NEON-NOSVE-NEXT: ushll2 v4.4s, v3.8h, #0 -; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 -; NEON-NOSVE-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; NEON-NOSVE-NEXT: ldr q5, [x0] -; NEON-NOSVE-NEXT: ushll2 v16.8h, v5.16b, #0 -; NEON-NOSVE-NEXT: ushll v5.8h, v5.8b, #0 -; NEON-NOSVE-NEXT: uzp1 v0.8h, v1.8h, v0.8h -; NEON-NOSVE-NEXT: ushll2 v18.4s, v16.8h, #0 -; NEON-NOSVE-NEXT: ushll v16.4s, v16.4h, #0 -; NEON-NOSVE-NEXT: udivr z17.s, p0/m, z17.s, z18.s -; NEON-NOSVE-NEXT: ushll2 v18.4s, v5.8h, #0 -; NEON-NOSVE-NEXT: ushll v5.4s, v5.4h, #0 -; NEON-NOSVE-NEXT: udivr z7.s, p0/m, z7.s, z16.s -; NEON-NOSVE-NEXT: ushll2 v16.4s, v6.8h, #0 -; NEON-NOSVE-NEXT: ushll v6.4s, v6.4h, #0 -; NEON-NOSVE-NEXT: udivr z16.s, p0/m, z16.s, z18.s -; NEON-NOSVE-NEXT: udiv z5.s, p0/m, z5.s, z6.s -; NEON-NOSVE-NEXT: udiv z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: uzp1 v3.8h, v7.8h, v17.8h -; NEON-NOSVE-NEXT: uzp1 v5.8h, v5.8h, v16.8h -; NEON-NOSVE-NEXT: uzp1 v1.8h, v2.8h, v4.8h -; NEON-NOSVE-NEXT: uzp1 v2.16b, v5.16b, v3.16b -; NEON-NOSVE-NEXT: uzp1 v0.16b, v1.16b, v0.16b -; NEON-NOSVE-NEXT: stp q2, q0, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v32i8: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -1697,16 +1385,6 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v2i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: movi d2, #0x00ffff0000ffff -; NEON-NOSVE-NEXT: ptrue p0.s, vl2 -; NEON-NOSVE-NEXT: and v1.8b, v1.8b, v2.8b -; NEON-NOSVE-NEXT: and v0.8b, v0.8b, v2.8b -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v2i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -1739,15 +1417,6 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v4i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: xtn v0.4h, v0.4s -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v4i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -1789,26 +1458,14 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v8i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v8i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -1873,41 +1530,18 @@ define void @udiv_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uzp1 z4.h, z5.h, z5.h ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z3.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z4.h, z5.h } +; CHECK-NEXT: splice z1.h, p0, { z1.h, z2.h } +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v16i16: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q4, q1, [x1] -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ldr q0, [x0, #16] -; NEON-NOSVE-NEXT: ushll2 v2.4s, v1.8h, #0 -; NEON-NOSVE-NEXT: ushll2 v3.4s, v0.8h, #0 -; NEON-NOSVE-NEXT: ushll2 v5.4s, v4.8h, #0 -; NEON-NOSVE-NEXT: ushll v4.4s, v4.4h, #0 -; NEON-NOSVE-NEXT: ushll v1.4s, v1.4h, #0 -; NEON-NOSVE-NEXT: ushll v0.4s, v0.4h, #0 -; NEON-NOSVE-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; NEON-NOSVE-NEXT: ldr q3, [x0] -; NEON-NOSVE-NEXT: ushll2 v6.4s, v3.8h, #0 -; NEON-NOSVE-NEXT: ushll v3.4s, v3.4h, #0 -; NEON-NOSVE-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; NEON-NOSVE-NEXT: udiv z3.s, p0/m, z3.s, z4.s -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: uzp1 v1.8h, v3.8h, v5.8h -; NEON-NOSVE-NEXT: uzp1 v0.8h, v0.8h, v2.8h -; NEON-NOSVE-NEXT: stp q1, q0, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v16i16: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -2001,15 +1635,6 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v2i32: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.s, vl2 -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1 -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v2i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #32 @@ -2038,15 +1663,6 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v4i32: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1 -; NEON-NOSVE-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v4i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -2082,17 +1698,6 @@ define void @udiv_v8i32(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v8i32: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q0, q3, [x1] -; NEON-NOSVE-NEXT: ptrue p0.s, vl4 -; NEON-NOSVE-NEXT: ldp q1, q2, [x0] -; NEON-NOSVE-NEXT: udivr z0.s, p0/m, z0.s, z1.s -; NEON-NOSVE-NEXT: movprfx z1, z2 -; NEON-NOSVE-NEXT: udiv z1.s, p0/m, z1.s, z3.s -; NEON-NOSVE-NEXT: stp q0, q1, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v8i32: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -2146,15 +1751,6 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v1i64: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.d, vl1 -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $d1 killed $d1 def $z1 -; NEON-NOSVE-NEXT: udiv z0.d, p0/m, z0.d, z1.d -; NEON-NOSVE-NEXT: // kill: def $d0 killed $d0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v1i64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #16 @@ -2180,15 +1776,6 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v2i64: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ptrue p0.d, vl2 -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 def $z0 -; NEON-NOSVE-NEXT: // kill: def $q1 killed $q1 def $z1 -; NEON-NOSVE-NEXT: udiv z0.d, p0/m, z0.d, z1.d -; NEON-NOSVE-NEXT: // kill: def $q0 killed $q0 killed $z0 -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v2i64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-48]! @@ -2218,17 +1805,6 @@ define void @udiv_v4i64(ptr %a, ptr %b) { ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret ; -; NEON-NOSVE-LABEL: udiv_v4i64: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: ldp q0, q3, [x1] -; NEON-NOSVE-NEXT: ptrue p0.d, vl2 -; NEON-NOSVE-NEXT: ldp q1, q2, [x0] -; NEON-NOSVE-NEXT: udivr z0.d, p0/m, z0.d, z1.d -; NEON-NOSVE-NEXT: movprfx z1, z2 -; NEON-NOSVE-NEXT: udiv z1.d, p0/m, z1.d, z3.d -; NEON-NOSVE-NEXT: stp q0, q1, [x0] -; NEON-NOSVE-NEXT: ret -; ; NONEON-NOSVE-LABEL: udiv_v4i64: ; NONEON-NOSVE: // %bb.0: ; NONEON-NOSVE-NEXT: sub sp, sp, #96 @@ -2261,64 +1837,22 @@ define void @udiv_v4i64(ptr %a, ptr %b) { } define void @udiv_constantsplat_v8i32(ptr %a) { -; SVE-LABEL: udiv_constantsplat_v8i32: -; SVE: // %bb.0: -; SVE-NEXT: mov w8, #8969 // =0x2309 -; SVE-NEXT: ldp q1, q2, [x0] -; SVE-NEXT: movk w8, #22765, lsl #16 -; SVE-NEXT: ptrue p0.s, vl4 -; SVE-NEXT: mov z0.s, w8 -; SVE-NEXT: movprfx z3, z1 -; SVE-NEXT: umulh z3.s, p0/m, z3.s, z0.s -; SVE-NEXT: umulh z0.s, p0/m, z0.s, z2.s -; SVE-NEXT: sub z1.s, z1.s, z3.s -; SVE-NEXT: sub z2.s, z2.s, z0.s -; SVE-NEXT: lsr z1.s, z1.s, #1 -; SVE-NEXT: lsr z2.s, z2.s, #1 -; SVE-NEXT: add z1.s, z1.s, z3.s -; SVE-NEXT: add z0.s, z2.s, z0.s -; SVE-NEXT: lsr z1.s, z1.s, #6 -; SVE-NEXT: lsr z0.s, z0.s, #6 -; SVE-NEXT: stp q1, q0, [x0] -; SVE-NEXT: ret -; -; SVE2-LABEL: udiv_constantsplat_v8i32: -; SVE2: // %bb.0: -; SVE2-NEXT: mov w8, #8969 // =0x2309 -; SVE2-NEXT: ldp q1, q2, [x0] -; SVE2-NEXT: movk w8, #22765, lsl #16 -; SVE2-NEXT: mov z0.s, w8 -; SVE2-NEXT: umulh z3.s, z1.s, z0.s -; SVE2-NEXT: umulh z0.s, z2.s, z0.s -; SVE2-NEXT: sub z1.s, z1.s, z3.s -; SVE2-NEXT: sub z2.s, z2.s, z0.s -; SVE2-NEXT: usra z3.s, z1.s, #1 -; SVE2-NEXT: usra z0.s, z2.s, #1 -; SVE2-NEXT: lsr z1.s, z3.s, #6 -; SVE2-NEXT: lsr z0.s, z0.s, #6 -; SVE2-NEXT: stp q1, q0, [x0] -; SVE2-NEXT: ret -; -; NEON-NOSVE-LABEL: udiv_constantsplat_v8i32: -; NEON-NOSVE: // %bb.0: -; NEON-NOSVE-NEXT: mov w8, #8969 // =0x2309 -; NEON-NOSVE-NEXT: ldp q1, q2, [x0] -; NEON-NOSVE-NEXT: movk w8, #22765, lsl #16 -; NEON-NOSVE-NEXT: dup v0.4s, w8 -; NEON-NOSVE-NEXT: umull2 v3.2d, v1.4s, v0.4s -; NEON-NOSVE-NEXT: umull v4.2d, v1.2s, v0.2s -; NEON-NOSVE-NEXT: umull2 v5.2d, v2.4s, v0.4s -; NEON-NOSVE-NEXT: umull v0.2d, v2.2s, v0.2s -; NEON-NOSVE-NEXT: uzp2 v3.4s, v4.4s, v3.4s -; NEON-NOSVE-NEXT: uzp2 v0.4s, v0.4s, v5.4s -; NEON-NOSVE-NEXT: sub v1.4s, v1.4s, v3.4s -; NEON-NOSVE-NEXT: sub v2.4s, v2.4s, v0.4s -; NEON-NOSVE-NEXT: usra v3.4s, v1.4s, #1 -; NEON-NOSVE-NEXT: usra v0.4s, v2.4s, #1 -; NEON-NOSVE-NEXT: ushr v1.4s, v3.4s, #6 -; NEON-NOSVE-NEXT: ushr v0.4s, v0.4s, #6 -; NEON-NOSVE-NEXT: stp q1, q0, [x0] -; NEON-NOSVE-NEXT: ret +; CHECK-LABEL: udiv_constantsplat_v8i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 // =0x2309 +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: umulh z3.s, z1.s, z0.s +; CHECK-NEXT: umulh z0.s, z2.s, z0.s +; CHECK-NEXT: sub z1.s, z1.s, z3.s +; CHECK-NEXT: sub z2.s, z2.s, z0.s +; CHECK-NEXT: usra z3.s, z1.s, #1 +; CHECK-NEXT: usra z0.s, z2.s, #1 +; CHECK-NEXT: lsr z1.s, z3.s, #6 +; CHECK-NEXT: lsr z0.s, z0.s, #6 +; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: udiv_constantsplat_v8i32: ; NONEON-NOSVE: // %bb.0: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll index b4641172f8b06d..9497ec88e57b4d 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -76,10 +76,10 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h +; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h +; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -160,23 +160,23 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mov z3.d, z1.d ; CHECK-NEXT: sunpklo z5.s, z5.h ; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8 -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h ; CHECK-NEXT: sunpklo z3.h, z3.b ; CHECK-NEXT: sunpklo z6.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h -; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h +; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } +; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b +; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b +; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b } ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -300,14 +300,12 @@ define void @srem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: sunpklo z4.s, z16.h ; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 ; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8 -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: sunpklo z7.s, z7.h ; CHECK-NEXT: movprfx z6, z4 ; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z3.s ; CHECK-NEXT: ldr q3, [x0] ; CHECK-NEXT: ldr q4, [x1] ; CHECK-NEXT: sunpklo z16.s, z16.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h ; CHECK-NEXT: sunpklo z17.h, z4.b ; CHECK-NEXT: sunpklo z18.h, z3.b ; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s @@ -317,11 +315,9 @@ define void @srem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 ; CHECK-NEXT: sunpklo z17.s, z17.h ; CHECK-NEXT: sunpklo z18.s, z18.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h ; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s ; CHECK-NEXT: mov z20.d, z3.d ; CHECK-NEXT: ext z20.b, z20.b, z3.b, #8 -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h ; CHECK-NEXT: sunpklo z20.h, z20.b ; CHECK-NEXT: sunpklo z22.s, z20.h ; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8 @@ -329,32 +325,36 @@ define void @srem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mov z18.d, z4.d ; CHECK-NEXT: sunpklo z20.s, z20.h ; CHECK-NEXT: ext z18.b, z18.b, z4.b, #8 -; CHECK-NEXT: uzp1 z16.h, z19.h, z19.h ; CHECK-NEXT: sunpklo z18.h, z18.b ; CHECK-NEXT: sunpklo z21.s, z18.h ; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 ; CHECK-NEXT: sunpklo z18.s, z18.h ; CHECK-NEXT: sdivr z21.s, p0/m, z21.s, z22.s -; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h +; CHECK-NEXT: uzp1 z22.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h ; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z20.s +; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z5.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h -; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h -; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b -; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h -; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h +; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h +; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h } +; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h } +; CHECK-NEXT: uzp1 z16.h, z21.h, z21.h +; CHECK-NEXT: splice z2.h, p0, { z19.h, z20.h } +; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z17.h, z18.h, z18.h +; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z17.b, z2.b, z2.b ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: splice z2.b, p0, z2.b, z6.b -; CHECK-NEXT: uzp1 z7.b, z19.b, z19.b -; CHECK-NEXT: splice z5.b, p0, z5.b, z7.b +; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b } +; CHECK-NEXT: uzp1 z18.b, z16.b, z16.b +; CHECK-NEXT: splice z2.b, p0, { z17.b, z18.b } ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b -; CHECK-NEXT: mls z3.b, p0/m, z5.b, z4.b -; CHECK-NEXT: stp q3, q0, [x0] +; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b +; CHECK-NEXT: msb z2.b, p0/m, z4.b, z3.b +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: srem_v32i8: @@ -600,9 +600,9 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h +; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -680,23 +680,23 @@ define void @srem_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s ; CHECK-NEXT: mov z6.d, z4.d ; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8 -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: sunpklo z6.s, z6.h ; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: mov z7.d, z1.d ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8 -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h ; CHECK-NEXT: sunpklo z7.s, z7.h ; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s +; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z6.h -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z7.h +; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h +; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h +; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h } ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: mls z3.h, p0/m, z5.h, z4.h -; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h -; CHECK-NEXT: stp q3, q0, [x0] +; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h +; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: srem_v16i16: @@ -1126,10 +1126,10 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h +; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h +; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -1210,23 +1210,23 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-NEXT: mov z3.d, z1.d ; CHECK-NEXT: uunpklo z5.s, z5.h ; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8 -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h ; CHECK-NEXT: uunpklo z3.h, z3.b ; CHECK-NEXT: uunpklo z6.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h -; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h +; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } +; CHECK-NEXT: uzp1 z4.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z7.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p0, { z6.h, z7.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b +; CHECK-NEXT: uzp1 z5.b, z3.b, z3.b +; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b } ; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1350,14 +1350,12 @@ define void @urem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: uunpklo z4.s, z16.h ; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 ; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8 -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uunpklo z7.s, z7.h ; CHECK-NEXT: movprfx z6, z4 ; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z3.s ; CHECK-NEXT: ldr q3, [x0] ; CHECK-NEXT: ldr q4, [x1] ; CHECK-NEXT: uunpklo z16.s, z16.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h ; CHECK-NEXT: uunpklo z17.h, z4.b ; CHECK-NEXT: uunpklo z18.h, z3.b ; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s @@ -1367,11 +1365,9 @@ define void @urem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 ; CHECK-NEXT: uunpklo z17.s, z17.h ; CHECK-NEXT: uunpklo z18.s, z18.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h ; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s ; CHECK-NEXT: mov z20.d, z3.d ; CHECK-NEXT: ext z20.b, z20.b, z3.b, #8 -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h ; CHECK-NEXT: uunpklo z20.h, z20.b ; CHECK-NEXT: uunpklo z22.s, z20.h ; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8 @@ -1379,32 +1375,36 @@ define void @urem_v32i8(ptr %a, ptr %b) { ; CHECK-NEXT: mov z18.d, z4.d ; CHECK-NEXT: uunpklo z20.s, z20.h ; CHECK-NEXT: ext z18.b, z18.b, z4.b, #8 -; CHECK-NEXT: uzp1 z16.h, z19.h, z19.h ; CHECK-NEXT: uunpklo z18.h, z18.b ; CHECK-NEXT: uunpklo z21.s, z18.h ; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 ; CHECK-NEXT: uunpklo z18.s, z18.h ; CHECK-NEXT: udivr z21.s, p0/m, z21.s, z22.s -; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h +; CHECK-NEXT: uzp1 z22.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h ; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z20.s +; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z5.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h -; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h -; CHECK-NEXT: uzp1 z5.b, z16.b, z16.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b -; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h -; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h +; CHECK-NEXT: uzp1 z20.h, z17.h, z17.h +; CHECK-NEXT: splice z7.h, p0, { z22.h, z23.h } +; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h } +; CHECK-NEXT: uzp1 z16.h, z21.h, z21.h +; CHECK-NEXT: splice z2.h, p0, { z19.h, z20.h } +; CHECK-NEXT: uzp1 z6.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z7.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z17.h, z18.h, z18.h +; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z17.b, z2.b, z2.b ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: splice z2.b, p0, z2.b, z6.b -; CHECK-NEXT: uzp1 z7.b, z19.b, z19.b -; CHECK-NEXT: splice z5.b, p0, z5.b, z7.b +; CHECK-NEXT: splice z5.b, p0, { z6.b, z7.b } +; CHECK-NEXT: uzp1 z18.b, z16.b, z16.b +; CHECK-NEXT: splice z2.b, p0, { z17.b, z18.b } ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b -; CHECK-NEXT: mls z3.b, p0/m, z5.b, z4.b -; CHECK-NEXT: stp q3, q0, [x0] +; CHECK-NEXT: mls z0.b, p0/m, z5.b, z1.b +; CHECK-NEXT: msb z2.b, p0/m, z4.b, z3.b +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: urem_v32i8: @@ -1650,9 +1650,9 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: uzp1 z4.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z5.h, z3.h, z3.h +; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } ; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1730,23 +1730,23 @@ define void @urem_v16i16(ptr %a, ptr %b) { ; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s ; CHECK-NEXT: mov z6.d, z4.d ; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8 -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uunpklo z6.s, z6.h ; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: mov z7.d, z1.d ; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8 -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h ; CHECK-NEXT: uunpklo z7.s, z7.h ; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s +; CHECK-NEXT: uzp1 z16.h, z5.h, z5.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z6.h -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z7.h +; CHECK-NEXT: uzp1 z17.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z5.h, z2.h, z2.h +; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z6.h, z7.h, z7.h +; CHECK-NEXT: splice z5.h, p0, { z5.h, z6.h } ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: mls z3.h, p0/m, z5.h, z4.h -; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h -; CHECK-NEXT: stp q3, q0, [x0] +; CHECK-NEXT: msb z2.h, p0/m, z4.h, z3.h +; CHECK-NEXT: mls z0.h, p0/m, z5.h, z1.h +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: urem_v16i16: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll index 5235423c00d9a1..e07036f2a1acfc 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -11,28 +11,28 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: fcmne p0.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z3.s, #0.0 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h +; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h +; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b -; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b +; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b } ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: umaxv b0, p0, z1.b +; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -120,49 +120,49 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) { define i1 @ptest_or_v16i1(ptr %a, ptr %b) { ; CHECK-LABEL: ptest_or_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: ldp q4, q5, [x1, #32] -; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: ldp q2, q3, [x1, #32] +; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: fcmne p1.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: ldp q1, q6, [x1] ; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: fcmne p4.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: fcmne p5.s, p0/z, z5.s, #0.0 -; CHECK-NEXT: fcmne p6.s, p0/z, z4.s, #0.0 -; CHECK-NEXT: fcmne p7.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p5.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: fcmne p4.s, p0/z, z5.s, #0.0 +; CHECK-NEXT: fcmne p7.s, p0/z, z4.s, #0.0 +; CHECK-NEXT: fcmne p6.s, p0/z, z6.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z4.s, p5/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z6.s, p7/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z7.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z17.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z19.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z4.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z6.h +; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z0.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h +; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h } +; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } +; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b -; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b -; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b -; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b +; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } +; CHECK-NEXT: splice z1.b, p0, { z4.b, z5.b } ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: orr z0.d, z1.d, z3.d +; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 @@ -329,49 +329,49 @@ declare i1 @llvm.vector.reduce.or.i1.v16i1(<16 x i1>) define i1 @ptest_and_v16i1(ptr %a, ptr %b) { ; CHECK-LABEL: ptest_and_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: ldp q4, q5, [x1, #32] -; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: ldp q0, q1, [x1] +; CHECK-NEXT: ldp q2, q3, [x1, #32] +; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: fcmne p1.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: ldp q1, q6, [x1] ; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: fcmne p4.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: fcmne p5.s, p0/z, z5.s, #0.0 -; CHECK-NEXT: fcmne p6.s, p0/z, z4.s, #0.0 -; CHECK-NEXT: fcmne p7.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p5.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: fcmne p4.s, p0/z, z5.s, #0.0 +; CHECK-NEXT: fcmne p7.s, p0/z, z4.s, #0.0 +; CHECK-NEXT: fcmne p6.s, p0/z, z6.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z4.s, p5/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z6.s, p7/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z7.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z17.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z19.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z4.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z6.h +; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z0.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h +; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h } +; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } +; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b -; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b -; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b -; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b +; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z2.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } +; CHECK-NEXT: splice z1.b, p0, { z4.b, z5.b } ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: and z0.d, z1.d, z3.d +; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll index c0aa162b19b77d..13fcd94ea8a260 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -129,11 +129,11 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) { define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) { ; CHECK-LABEL: store_trunc_v2i256i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl1 -; CHECK-NEXT: ldr d0, [x0, #32] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d -; CHECK-NEXT: str q1, [x1] +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: splice z0.d, p0, { z0.d, z1.d } +; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: store_trunc_v2i256i64: diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll index 77aaeeadcfc2f0..9d241f6f927e11 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s +; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s ; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE @@ -13,11 +13,11 @@ target triple = "aarch64-unknown-linux-gnu" define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v16i16_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: uzp1 z3.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z2.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -69,18 +69,18 @@ define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind { define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i16_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b -; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b +; CHECK-NEXT: splice z2.b, p0, { z4.b, z5.b } +; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b } ; CHECK-NEXT: add z1.b, z2.b, z2.b -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8: @@ -216,27 +216,27 @@ define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind { define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v64i16_v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #64] +; CHECK-NEXT: ldp q1, q0, [x0, #64] ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b -; CHECK-NEXT: uzp1 z4.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z7.b, z7.b, z7.b -; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b -; CHECK-NEXT: splice z4.b, p0, z4.b, z5.b -; CHECK-NEXT: splice z6.b, p0, z6.b, z7.b +; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b +; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b +; CHECK-NEXT: splice z0.b, p0, { z6.b, z7.b } +; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b +; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b } +; CHECK-NEXT: splice z2.b, p0, { z2.b, z3.b } +; CHECK-NEXT: splice z3.b, p0, { z4.b, z5.b } ; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: add z2.b, z2.b, z2.b -; CHECK-NEXT: add z1.b, z4.b, z4.b -; CHECK-NEXT: add z3.b, z6.b, z6.b +; CHECK-NEXT: add z3.b, z3.b, z3.b ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret @@ -527,49 +527,49 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #192] ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: ldp q6, q7, [x0, #64] -; CHECK-NEXT: ldp q16, q17, [x0, #224] -; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: ldp q20, q21, [x0, #160] -; CHECK-NEXT: uzp1 z7.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z17.b, z3.b, z3.b +; CHECK-NEXT: ldp q3, q18, [x0, #224] +; CHECK-NEXT: uzp1 z16.b, z2.b, z2.b +; CHECK-NEXT: ldp q2, q19, [x0, #128] ; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: uzp1 z17.b, z17.b, z17.b -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: uzp1 z16.b, z16.b, z16.b -; CHECK-NEXT: ldp q18, q19, [x0, #128] -; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b -; CHECK-NEXT: uzp1 z3.b, z21.b, z21.b -; CHECK-NEXT: uzp1 z20.b, z20.b, z20.b -; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b -; CHECK-NEXT: ldp q21, q22, [x0] -; CHECK-NEXT: splice z16.b, p0, z16.b, z17.b +; CHECK-NEXT: uzp1 z21.b, z18.b, z18.b +; CHECK-NEXT: ldp q18, q22, [x0, #160] +; CHECK-NEXT: uzp1 z20.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z24.b, z19.b, z19.b +; CHECK-NEXT: ldp q3, q19, [x0, #96] +; CHECK-NEXT: uzp1 z23.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z26.b, z22.b, z22.b +; CHECK-NEXT: splice z2.b, p0, { z16.b, z17.b } +; CHECK-NEXT: uzp1 z17.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z25.b, z18.b, z18.b +; CHECK-NEXT: splice z7.b, p0, { z20.b, z21.b } +; CHECK-NEXT: uzp1 z21.b, z5.b, z5.b ; CHECK-NEXT: uzp1 z19.b, z19.b, z19.b -; CHECK-NEXT: uzp1 z18.b, z18.b, z18.b -; CHECK-NEXT: uzp1 z4.b, z4.b, z4.b -; CHECK-NEXT: splice z20.b, p0, z20.b, z3.b -; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b -; CHECK-NEXT: splice z6.b, p0, z6.b, z7.b -; CHECK-NEXT: uzp1 z5.b, z22.b, z22.b -; CHECK-NEXT: uzp1 z7.b, z21.b, z21.b -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: splice z18.b, p0, z18.b, z19.b -; CHECK-NEXT: add z2.b, z2.b, z2.b -; CHECK-NEXT: splice z4.b, p0, z4.b, z3.b -; CHECK-NEXT: add z3.b, z16.b, z16.b -; CHECK-NEXT: splice z7.b, p0, z7.b, z5.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: add z1.b, z20.b, z20.b -; CHECK-NEXT: add z5.b, z18.b, z18.b -; CHECK-NEXT: stp q2, q3, [x1, #96] -; CHECK-NEXT: add z2.b, z6.b, z6.b +; CHECK-NEXT: uzp1 z20.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z5.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z16.b, z6.b, z6.b +; CHECK-NEXT: splice z6.b, p0, { z23.b, z24.b } +; CHECK-NEXT: uzp1 z18.b, z3.b, z3.b +; CHECK-NEXT: splice z3.b, p0, { z25.b, z26.b } +; CHECK-NEXT: uzp1 z4.b, z0.b, z0.b +; CHECK-NEXT: add z0.b, z2.b, z2.b +; CHECK-NEXT: add z7.b, z7.b, z7.b +; CHECK-NEXT: splice z1.b, p0, { z16.b, z17.b } +; CHECK-NEXT: splice z2.b, p0, { z18.b, z19.b } +; CHECK-NEXT: splice z16.b, p0, { z20.b, z21.b } +; CHECK-NEXT: splice z4.b, p0, { z4.b, z5.b } +; CHECK-NEXT: add z6.b, z6.b, z6.b +; CHECK-NEXT: add z3.b, z3.b, z3.b +; CHECK-NEXT: stp q0, q7, [x1, #96] +; CHECK-NEXT: add z0.b, z1.b, z1.b +; CHECK-NEXT: add z1.b, z2.b, z2.b +; CHECK-NEXT: add z2.b, z16.b, z16.b +; CHECK-NEXT: stp q6, q3, [x1, #64] ; CHECK-NEXT: add z3.b, z4.b, z4.b -; CHECK-NEXT: add z4.b, z7.b, z7.b -; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: stp q5, q1, [x1, #64] -; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q4, q0, [x1] +; CHECK-NEXT: stp q0, q1, [x1, #32] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8: @@ -1181,11 +1181,11 @@ define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind { define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i32_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h +; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h } ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -1219,17 +1219,17 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h +; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b +; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -1277,32 +1277,32 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind { define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i32_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #96] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ldp q4, q5, [x0, #64] ; CHECK-NEXT: ldp q6, q7, [x0] -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h +; CHECK-NEXT: uzp1 z17.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z16.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z19.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z18.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h +; CHECK-NEXT: splice z4.h, p0, { z16.h, z17.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } +; CHECK-NEXT: splice z5.h, p0, { z18.h, z19.h } +; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z3.b, z6.b, z6.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b -; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b -; CHECK-NEXT: add z0.b, z2.b, z2.b -; CHECK-NEXT: add z1.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z7.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z6.b, z1.b, z1.b +; CHECK-NEXT: splice z0.b, p0, { z2.b, z3.b } +; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b } +; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret ; @@ -1429,56 +1429,56 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #160] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldp q4, q5, [x0, #128] +; CHECK-NEXT: ldp q4, q5, [x0, #96] +; CHECK-NEXT: ldp q6, q7, [x0] +; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h +; CHECK-NEXT: ldp q3, q18, [x0, #128] +; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h +; CHECK-NEXT: ldp q2, q19, [x0, #192] ; CHECK-NEXT: ldp q0, q1, [x0, #64] -; CHECK-NEXT: ldp q6, q7, [x0, #96] -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ldp q16, q17, [x0] -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: ldp q18, q19, [x0, #192] -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: ldp q20, q21, [x0, #224] -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: ldp q22, q23, [x0, #32] -; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h -; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h -; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h -; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h -; CHECK-NEXT: uzp1 z3.h, z21.h, z21.h -; CHECK-NEXT: uzp1 z5.h, z20.h, z20.h -; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z20.h, z23.h, z23.h -; CHECK-NEXT: uzp1 z21.h, z22.h, z22.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z18.h, p0, z18.h, z19.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h -; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h -; CHECK-NEXT: splice z21.h, p0, z21.h, z20.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h +; CHECK-NEXT: ldp q18, q22, [x0, #224] +; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h +; CHECK-NEXT: ldp q3, q23, [x0, #32] +; CHECK-NEXT: splice z16.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z27.h, z19.h, z19.h +; CHECK-NEXT: uzp1 z25.h, z22.h, z22.h +; CHECK-NEXT: uzp1 z26.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z24.h, z18.h, z18.h +; CHECK-NEXT: uzp1 z18.h, z23.h, z23.h +; CHECK-NEXT: uzp1 z23.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z3.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z22.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h +; CHECK-NEXT: splice z1.h, p0, { z20.h, z21.h } +; CHECK-NEXT: splice z6.h, p0, { z24.h, z25.h } +; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z26.h, z27.h } +; CHECK-NEXT: splice z7.h, p0, { z17.h, z18.h } +; CHECK-NEXT: uzp1 z17.b, z16.b, z16.b +; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h } +; CHECK-NEXT: splice z3.h, p0, { z22.h, z23.h } +; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h } +; CHECK-NEXT: uzp1 z16.b, z1.b, z1.b ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z4.b, z18.b, z18.b -; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b -; CHECK-NEXT: uzp1 z7.b, z16.b, z16.b -; CHECK-NEXT: uzp1 z5.b, z21.b, z21.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z6.b, z6.b -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: splice z4.b, p0, z4.b, z3.b -; CHECK-NEXT: splice z7.b, p0, z7.b, z5.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: add z1.b, z2.b, z2.b -; CHECK-NEXT: add z2.b, z4.b, z4.b -; CHECK-NEXT: add z3.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b +; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z1.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b +; CHECK-NEXT: splice z7.b, p0, { z16.b, z17.b } +; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b +; CHECK-NEXT: splice z4.b, p0, { z5.b, z6.b } +; CHECK-NEXT: splice z0.b, p0, { z0.b, z1.b } +; CHECK-NEXT: splice z1.b, p0, { z2.b, z3.b } +; CHECK-NEXT: add z2.b, z7.b, z7.b +; CHECK-NEXT: add z3.b, z4.b, z4.b ; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: stp q1, q2, [x1, #32] -; CHECK-NEXT: stp q3, q0, [x1] +; CHECK-NEXT: add z1.b, z1.b, z1.b +; CHECK-NEXT: stp q2, q3, [x1, #32] +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8: @@ -1765,11 +1765,11 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z2.h, z1.h, z1.h +; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -1801,18 +1801,18 @@ define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind { define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v16i32_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h +; CHECK-NEXT: splice z2.h, p0, { z4.h, z5.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: add z1.h, z2.h, z2.h -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16: @@ -1877,27 +1877,27 @@ define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind { define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i32_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #64] +; CHECK-NEXT: ldp q1, q0, [x0, #64] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h +; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h +; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z6.h, z7.h } +; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h +; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h } +; CHECK-NEXT: splice z2.h, p0, { z2.h, z3.h } +; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h } ; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: add z2.h, z2.h, z2.h -; CHECK-NEXT: add z1.h, z4.h, z4.h -; CHECK-NEXT: add z3.h, z6.h, z6.h +; CHECK-NEXT: add z3.h, z3.h, z3.h ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret @@ -2027,49 +2027,49 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #192] ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: ldp q6, q7, [x0, #64] -; CHECK-NEXT: ldp q16, q17, [x0, #224] -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: ldp q20, q21, [x0, #160] -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z17.h, z3.h, z3.h +; CHECK-NEXT: ldp q3, q18, [x0, #224] +; CHECK-NEXT: uzp1 z16.h, z2.h, z2.h +; CHECK-NEXT: ldp q2, q19, [x0, #128] ; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h -; CHECK-NEXT: ldp q18, q19, [x0, #128] -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: uzp1 z3.h, z21.h, z21.h -; CHECK-NEXT: uzp1 z20.h, z20.h, z20.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: ldp q21, q22, [x0] -; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h +; CHECK-NEXT: uzp1 z21.h, z18.h, z18.h +; CHECK-NEXT: ldp q18, q22, [x0, #160] +; CHECK-NEXT: uzp1 z20.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z24.h, z19.h, z19.h +; CHECK-NEXT: ldp q3, q19, [x0, #96] +; CHECK-NEXT: uzp1 z23.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z26.h, z22.h, z22.h +; CHECK-NEXT: splice z2.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z17.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z25.h, z18.h, z18.h +; CHECK-NEXT: splice z7.h, p0, { z20.h, z21.h } +; CHECK-NEXT: uzp1 z21.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h -; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z20.h, p0, z20.h, z3.h -; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h -; CHECK-NEXT: uzp1 z5.h, z22.h, z22.h -; CHECK-NEXT: uzp1 z7.h, z21.h, z21.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z18.h, p0, z18.h, z19.h -; CHECK-NEXT: add z2.h, z2.h, z2.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z3.h -; CHECK-NEXT: add z3.h, z16.h, z16.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: add z1.h, z20.h, z20.h -; CHECK-NEXT: add z5.h, z18.h, z18.h -; CHECK-NEXT: stp q2, q3, [x1, #96] -; CHECK-NEXT: add z2.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z20.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z5.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z16.h, z6.h, z6.h +; CHECK-NEXT: splice z6.h, p0, { z23.h, z24.h } +; CHECK-NEXT: uzp1 z18.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p0, { z25.h, z26.h } +; CHECK-NEXT: uzp1 z4.h, z0.h, z0.h +; CHECK-NEXT: add z0.h, z2.h, z2.h +; CHECK-NEXT: add z7.h, z7.h, z7.h +; CHECK-NEXT: splice z1.h, p0, { z16.h, z17.h } +; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h } +; CHECK-NEXT: splice z16.h, p0, { z20.h, z21.h } +; CHECK-NEXT: splice z4.h, p0, { z4.h, z5.h } +; CHECK-NEXT: add z6.h, z6.h, z6.h +; CHECK-NEXT: add z3.h, z3.h, z3.h +; CHECK-NEXT: stp q0, q7, [x1, #96] +; CHECK-NEXT: add z0.h, z1.h, z1.h +; CHECK-NEXT: add z1.h, z2.h, z2.h +; CHECK-NEXT: add z2.h, z16.h, z16.h +; CHECK-NEXT: stp q6, q3, [x1, #64] ; CHECK-NEXT: add z3.h, z4.h, z4.h -; CHECK-NEXT: add z4.h, z7.h, z7.h -; CHECK-NEXT: add z0.h, z0.h, z0.h -; CHECK-NEXT: stp q5, q1, [x1, #64] -; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q4, q0, [x1] +; CHECK-NEXT: stp q0, q1, [x1, #32] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16: @@ -2360,11 +2360,11 @@ define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind { define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v4i64_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s +; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s } ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -2392,18 +2392,18 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s +; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s } +; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret ; @@ -2439,34 +2439,34 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind { define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #96] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ldp q4, q5, [x0, #64] ; CHECK-NEXT: ldp q6, q7, [x0] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z19.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z1.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z18.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s +; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s } +; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } +; CHECK-NEXT: splice z1.s, p0, { z18.s, z19.s } +; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s } ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h +; CHECK-NEXT: splice z2.h, p0, { z3.h, z4.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b +; CHECK-NEXT: splice z0.b, p0, { z1.b, z2.b } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -2523,62 +2523,62 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind { define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i64_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q5, q6, [x0, #224] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q2, q3, [x0, #224] -; CHECK-NEXT: ldp q4, q5, [x0, #32] -; CHECK-NEXT: ldp q6, q7, [x0, #64] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: ldp q16, q17, [x0, #192] -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: ldp q18, q19, [x0, #128] -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q20, q21, [x0, #160] -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s -; CHECK-NEXT: ldp q22, q23, [x0, #96] -; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s -; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s -; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s -; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s -; CHECK-NEXT: uzp1 z21.s, z21.s, z21.s -; CHECK-NEXT: uzp1 z20.s, z20.s, z20.s -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: uzp1 z23.s, z23.s, z23.s -; CHECK-NEXT: uzp1 z22.s, z22.s, z22.s -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s -; CHECK-NEXT: splice z20.s, p0, z20.s, z21.s -; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s -; CHECK-NEXT: splice z22.s, p0, z22.s, z23.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q4, q7, [x0, #64] +; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s +; CHECK-NEXT: ldp q6, q18, [x0, #192] +; CHECK-NEXT: uzp1 z16.s, z5.s, z5.s +; CHECK-NEXT: ldp q5, q19, [x0, #128] +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s +; CHECK-NEXT: ldp q18, q22, [x0, #160] +; CHECK-NEXT: uzp1 z20.s, z6.s, z6.s +; CHECK-NEXT: ldp q6, q23, [x0, #96] +; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s } +; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s +; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s +; CHECK-NEXT: uzp1 z26.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s +; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s +; CHECK-NEXT: uzp1 z23.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z17.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z6.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z22.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z5.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s +; CHECK-NEXT: splice z3.s, p0, { z20.s, z21.s } +; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s +; CHECK-NEXT: splice z0.s, p0, { z24.s, z25.s } +; CHECK-NEXT: splice z7.s, p0, { z26.s, z27.s } +; CHECK-NEXT: splice z4.s, p0, { z17.s, z18.s } +; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h +; CHECK-NEXT: splice z5.s, p0, { z5.s, z6.s } +; CHECK-NEXT: splice z6.s, p0, { z22.s, z23.s } +; CHECK-NEXT: splice z1.s, p0, { z1.s, z2.s } +; CHECK-NEXT: uzp1 z16.h, z3.h, z3.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z2.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z3.h, z20.h, z20.h -; CHECK-NEXT: uzp1 z5.h, z18.h, z18.h -; CHECK-NEXT: uzp1 z7.h, z22.h, z22.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z19.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z2.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z18.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h +; CHECK-NEXT: splice z0.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z4.h, z1.h, z1.h +; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } +; CHECK-NEXT: splice z2.h, p0, { z18.h, z19.h } +; CHECK-NEXT: splice z3.h, p0, { z4.h, z5.h } +; CHECK-NEXT: uzp1 z5.b, z0.b, z0.b ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b -; CHECK-NEXT: uzp1 z3.b, z6.b, z6.b -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z3.b -; CHECK-NEXT: add z1.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z4.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z7.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z6.b, z3.b, z3.b +; CHECK-NEXT: splice z0.b, p0, { z4.b, z5.b } +; CHECK-NEXT: splice z1.b, p0, { z6.b, z7.b } ; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: add z1.b, z1.b, z1.b +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8: @@ -2731,11 +2731,11 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind { ; CHECK-LABEL: trunc_v4i64_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s +; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s } ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -2763,17 +2763,17 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s +; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s } +; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, { z1.h, z2.h } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -2810,32 +2810,32 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind { define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #96] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ldp q4, q5, [x0, #64] ; CHECK-NEXT: ldp q6, q7, [x0] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: uzp1 z17.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z16.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z1.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z19.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z0.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z18.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s +; CHECK-NEXT: splice z4.s, p0, { z16.s, z17.s } +; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } +; CHECK-NEXT: splice z5.s, p0, { z18.s, z19.s } +; CHECK-NEXT: splice z1.s, p0, { z2.s, z3.s } ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h -; CHECK-NEXT: add z0.h, z2.h, z2.h -; CHECK-NEXT: add z1.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z7.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z2.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z6.h, z1.h, z1.h +; CHECK-NEXT: splice z0.h, p0, { z2.h, z3.h } +; CHECK-NEXT: splice z1.h, p0, { z6.h, z7.h } +; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret ; @@ -2915,56 +2915,56 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #160] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q4, q5, [x0, #128] +; CHECK-NEXT: ldp q4, q5, [x0, #96] +; CHECK-NEXT: ldp q6, q7, [x0] +; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s +; CHECK-NEXT: ldp q3, q18, [x0, #128] +; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s +; CHECK-NEXT: ldp q2, q19, [x0, #192] ; CHECK-NEXT: ldp q0, q1, [x0, #64] -; CHECK-NEXT: ldp q6, q7, [x0, #96] -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q16, q17, [x0] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: ldp q18, q19, [x0, #192] -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s -; CHECK-NEXT: ldp q20, q21, [x0, #224] -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: ldp q22, q23, [x0, #32] -; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s -; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s -; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s -; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s -; CHECK-NEXT: uzp1 z3.s, z21.s, z21.s -; CHECK-NEXT: uzp1 z5.s, z20.s, z20.s -; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s -; CHECK-NEXT: uzp1 z20.s, z23.s, z23.s -; CHECK-NEXT: uzp1 z21.s, z22.s, z22.s -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s -; CHECK-NEXT: splice z5.s, p0, z5.s, z3.s -; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s -; CHECK-NEXT: splice z21.s, p0, z21.s, z20.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s +; CHECK-NEXT: ldp q18, q22, [x0, #224] +; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s +; CHECK-NEXT: ldp q3, q23, [x0, #32] +; CHECK-NEXT: splice z16.s, p0, { z16.s, z17.s } +; CHECK-NEXT: uzp1 z27.s, z19.s, z19.s +; CHECK-NEXT: uzp1 z25.s, z22.s, z22.s +; CHECK-NEXT: uzp1 z26.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z24.s, z18.s, z18.s +; CHECK-NEXT: uzp1 z18.s, z23.s, z23.s +; CHECK-NEXT: uzp1 z23.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z3.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z22.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s +; CHECK-NEXT: splice z1.s, p0, { z20.s, z21.s } +; CHECK-NEXT: splice z6.s, p0, { z24.s, z25.s } +; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s +; CHECK-NEXT: splice z0.s, p0, { z26.s, z27.s } +; CHECK-NEXT: splice z7.s, p0, { z17.s, z18.s } +; CHECK-NEXT: uzp1 z17.h, z16.h, z16.h +; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s } +; CHECK-NEXT: splice z3.s, p0, { z22.s, z23.s } +; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s } +; CHECK-NEXT: uzp1 z16.h, z1.h, z1.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z4.h, z18.h, z18.h -; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z5.h, z21.h, z21.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h -; CHECK-NEXT: uzp1 z1.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z3.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: add z1.h, z2.h, z2.h -; CHECK-NEXT: add z2.h, z4.h, z4.h -; CHECK-NEXT: add z3.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z5.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z7.h, p0, { z16.h, z17.h } +; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h +; CHECK-NEXT: splice z4.h, p0, { z5.h, z6.h } +; CHECK-NEXT: splice z0.h, p0, { z0.h, z1.h } +; CHECK-NEXT: splice z1.h, p0, { z2.h, z3.h } +; CHECK-NEXT: add z2.h, z7.h, z7.h +; CHECK-NEXT: add z3.h, z4.h, z4.h ; CHECK-NEXT: add z0.h, z0.h, z0.h -; CHECK-NEXT: stp q1, q2, [x1, #32] -; CHECK-NEXT: stp q3, q0, [x1] +; CHECK-NEXT: add z1.h, z1.h, z1.h +; CHECK-NEXT: stp q2, q3, [x1, #32] +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16: @@ -3118,11 +3118,11 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind { define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind { ; CHECK-LABEL: trunc_v4i64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: uzp1 z3.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z2.s, z1.s, z1.s +; CHECK-NEXT: splice z0.s, p0, { z2.s, z3.s } ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret ; @@ -3146,18 +3146,18 @@ define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind { define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v8i64_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z1.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s +; CHECK-NEXT: splice z2.s, p0, { z4.s, z5.s } +; CHECK-NEXT: splice z0.s, p0, { z0.s, z1.s } ; CHECK-NEXT: add z1.s, z2.s, z2.s -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32: @@ -3202,27 +3202,27 @@ define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind { define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #64] +; CHECK-NEXT: ldp q1, q0, [x0, #64] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: uzp1 z7.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z6.s, z1.s, z1.s +; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z2.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z5.s, z0.s, z0.s +; CHECK-NEXT: splice z0.s, p0, { z6.s, z7.s } +; CHECK-NEXT: uzp1 z4.s, z1.s, z1.s +; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s } +; CHECK-NEXT: splice z2.s, p0, { z2.s, z3.s } +; CHECK-NEXT: splice z3.s, p0, { z4.s, z5.s } ; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: add z1.s, z1.s, z1.s ; CHECK-NEXT: add z2.s, z2.s, z2.s -; CHECK-NEXT: add z1.s, z4.s, z4.s -; CHECK-NEXT: add z3.s, z6.s, z6.s +; CHECK-NEXT: add z3.s, z3.s, z3.s ; CHECK-NEXT: stp q0, q1, [x1, #32] ; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret @@ -3297,49 +3297,49 @@ define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q2, q3, [x0, #192] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: ldp q6, q7, [x0, #64] -; CHECK-NEXT: ldp q16, q17, [x0, #224] -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q20, q21, [x0, #160] -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z17.s, z3.s, z3.s +; CHECK-NEXT: ldp q3, q18, [x0, #224] +; CHECK-NEXT: uzp1 z16.s, z2.s, z2.s +; CHECK-NEXT: ldp q2, q19, [x0, #128] ; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s -; CHECK-NEXT: ldp q4, q5, [x0, #96] -; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s -; CHECK-NEXT: ldp q18, q19, [x0, #128] -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: uzp1 z3.s, z21.s, z21.s -; CHECK-NEXT: uzp1 z20.s, z20.s, z20.s -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: ldp q21, q22, [x0] -; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s +; CHECK-NEXT: uzp1 z21.s, z18.s, z18.s +; CHECK-NEXT: ldp q18, q22, [x0, #160] +; CHECK-NEXT: uzp1 z20.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z24.s, z19.s, z19.s +; CHECK-NEXT: ldp q3, q19, [x0, #96] +; CHECK-NEXT: uzp1 z23.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z26.s, z22.s, z22.s +; CHECK-NEXT: splice z2.s, p0, { z16.s, z17.s } +; CHECK-NEXT: uzp1 z17.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z25.s, z18.s, z18.s +; CHECK-NEXT: splice z7.s, p0, { z20.s, z21.s } +; CHECK-NEXT: uzp1 z21.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s -; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: splice z20.s, p0, z20.s, z3.s -; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s -; CHECK-NEXT: uzp1 z5.s, z22.s, z22.s -; CHECK-NEXT: uzp1 z7.s, z21.s, z21.s -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s -; CHECK-NEXT: add z2.s, z2.s, z2.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z3.s -; CHECK-NEXT: add z3.s, z16.s, z16.s -; CHECK-NEXT: splice z7.s, p0, z7.s, z5.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: add z1.s, z20.s, z20.s -; CHECK-NEXT: add z5.s, z18.s, z18.s -; CHECK-NEXT: stp q2, q3, [x1, #96] -; CHECK-NEXT: add z2.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z20.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z5.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z16.s, z6.s, z6.s +; CHECK-NEXT: splice z6.s, p0, { z23.s, z24.s } +; CHECK-NEXT: uzp1 z18.s, z3.s, z3.s +; CHECK-NEXT: splice z3.s, p0, { z25.s, z26.s } +; CHECK-NEXT: uzp1 z4.s, z0.s, z0.s +; CHECK-NEXT: add z0.s, z2.s, z2.s +; CHECK-NEXT: add z7.s, z7.s, z7.s +; CHECK-NEXT: splice z1.s, p0, { z16.s, z17.s } +; CHECK-NEXT: splice z2.s, p0, { z18.s, z19.s } +; CHECK-NEXT: splice z16.s, p0, { z20.s, z21.s } +; CHECK-NEXT: splice z4.s, p0, { z4.s, z5.s } +; CHECK-NEXT: add z6.s, z6.s, z6.s +; CHECK-NEXT: add z3.s, z3.s, z3.s +; CHECK-NEXT: stp q0, q7, [x1, #96] +; CHECK-NEXT: add z0.s, z1.s, z1.s +; CHECK-NEXT: add z1.s, z2.s, z2.s +; CHECK-NEXT: add z2.s, z16.s, z16.s +; CHECK-NEXT: stp q6, q3, [x1, #64] ; CHECK-NEXT: add z3.s, z4.s, z4.s -; CHECK-NEXT: add z4.s, z7.s, z7.s -; CHECK-NEXT: add z0.s, z0.s, z0.s -; CHECK-NEXT: stp q5, q1, [x1, #64] -; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q4, q0, [x1] +; CHECK-NEXT: stp q0, q1, [x1, #32] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll index 3438cbdd476d85..4b0ff1b2eb4704 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll @@ -24,9 +24,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -50,9 +53,12 @@ define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrsp ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -82,9 +88,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p ; GFX11-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX11-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve ; GFX11-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX11-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX11-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX11-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX11-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX11-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX11-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX11-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX11-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX11-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX11-NEXT: $vgpr10 = COPY [[COPY5]](s32) @@ -108,9 +117,12 @@ define amdgpu_cs_chain void @chain_preserve_call(<3 x i32> inreg %sgpr, { i32, p ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[GV1:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @callee_preserve ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) - ; GFX10-NEXT: $sgpr0 = COPY [[UV]](s32) - ; GFX10-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; GFX10-NEXT: $sgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; GFX10-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; GFX10-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) ; GFX10-NEXT: $vgpr8 = COPY [[COPY3]](s32) ; GFX10-NEXT: $vgpr9 = COPY [[COPY4]](p5) ; GFX10-NEXT: $vgpr10 = COPY [[COPY5]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index 5effd24a752088..adad38de380d7d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -50,7 +50,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -99,8 +100,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index c3694158e7b971..96c3575e3190c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -942,7 +942,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg - ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[C]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -3984,8 +3985,10 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_struct_i8_i32_inreg ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[LOAD1]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT1]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[LOAD2]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 @@ -5309,7 +5312,8 @@ define void @test_call_external_void_func_i16_inreg(i16 inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5354,7 +5358,8 @@ define void @test_call_external_void_func_i32_inreg(i32 inreg %arg) #0 { ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5402,8 +5407,10 @@ define void @test_call_external_void_func_i64_inreg(i64 inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5451,8 +5458,10 @@ define void @test_call_external_void_func_v2i32_inreg(<2 x i32> inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5499,7 +5508,8 @@ define void @test_call_external_void_func_f16_inreg(half inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5546,7 +5556,8 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 { ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) - ; CHECK-NEXT: $sgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[ANYEXT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5591,7 +5602,8 @@ define void @test_call_external_void_func_f32_inreg(float inreg %arg) #0 { ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5639,8 +5651,10 @@ define void @test_call_external_void_func_f64_inreg(double inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5685,7 +5699,9 @@ define void @test_call_external_void_func_v2f16_inreg(<2 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY9]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5738,8 +5754,12 @@ define void @test_call_external_void_func_v3f16_inreg(<3 x half> inreg %arg) #0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $sgpr1 = COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5787,8 +5807,12 @@ define void @test_call_external_void_func_v4f16_inreg(<4 x half> inreg %arg) #0 ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5836,8 +5860,10 @@ define void @test_call_external_void_func_p0_inreg(ptr inreg %arg) #0 { ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p0) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5885,8 +5911,10 @@ define void @test_call_external_void_func_p1_inreg(ptr addrspace(1) inreg %arg) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) @@ -5931,7 +5959,9 @@ define void @test_call_external_void_func_p3_inreg(ptr addrspace(3) inreg %arg) ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: $sgpr0 = COPY [[COPY9]](p3) + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY9]](p3) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -5983,10 +6013,14 @@ define void @test_call_external_void_func_v2p1_inreg(<2 x ptr addrspace(1)> inre ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p1>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $sgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $sgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV2]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INTRINSIC_CONVERGENT2]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV3]](s32) + ; CHECK-NEXT: $sgpr3 = COPY [[INTRINSIC_CONVERGENT3]](s32) ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) @@ -6034,8 +6068,10 @@ define void @test_call_external_void_func_v2p5_inreg(<2 x ptr addrspace(5)> inre ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x p5>) - ; CHECK-NEXT: $sgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $sgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll index 06f66e05d6747e..8ca3e8255b6340 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll @@ -501,6 +501,79 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { unreachable } +; Chain call with SGPR arguments that we cannot prove are uniform. +define amdgpu_cs void @cs_to_chain_nonuniform(<3 x i32> %a, <3 x i32> %b) { +; GISEL-GFX11-LABEL: cs_to_chain_nonuniform: +; GISEL-GFX11: ; %bb.0: +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 +; GISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 +; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; GISEL-GFX10-LABEL: cs_to_chain_nonuniform: +; GISEL-GFX10: ; %bb.0: +; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 +; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 +; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 +; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; GISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX11-LABEL: cs_to_chain_nonuniform: +; DAGISEL-GFX11: ; %bb.0: +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 +; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 +; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] +; +; DAGISEL-GFX10-LABEL: cs_to_chain_nonuniform: +; DAGISEL-GFX10: ; %bb.0: +; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 +; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 +; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi +; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo +; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) +; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 +; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 +; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 +; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] +; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] +; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 +; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] + call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) + unreachable +} + define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { ; GISEL-GFX11-LABEL: chain_to_chain: ; GISEL-GFX11: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll index f54ea615ca6645..c57a35aa1880db 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -17,7 +17,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_bfe_u32 v2, v2, 0, v3 +; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 +; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 +; SI-NEXT: v_lshrrev_b32_e32 v2, v3, v2 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -36,7 +38,9 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_bfe_u32 v2, v3, 0, v4 +; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 +; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 +; VI-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() @@ -215,7 +219,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; SI-NEXT: buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 glc ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 s[2:3], s[6:7] -; SI-NEXT: v_bfe_i32 v2, v2, 0, v3 +; SI-NEXT: v_sub_i32_e32 v3, vcc, 32, v3 +; SI-NEXT: v_lshlrev_b32_e32 v2, v3, v2 +; SI-NEXT: v_ashrrev_i32_e32 v2, v3, v2 ; SI-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 ; SI-NEXT: s_endpgm ; @@ -234,7 +240,9 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_bfe_i32 v2, v3, 0, v4 +; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4 +; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3 +; VI-NEXT: v_ashrrev_i32_e32 v2, v2, v3 ; VI-NEXT: flat_store_dword v[0:1], v2 ; VI-NEXT: s_endpgm %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir index 2cb84c7ef4637d..072cc3a60a60ca 100644 --- a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir +++ b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-nophis.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-tailduplication -o - %s | FileCheck %s # There are no phis in this testcase. Early tail duplication introduces them, # so the NoPHIs property needs to be cleared to avoid verifier errors diff --git a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir index 41c6906b3c85ad..8132fa4df89eee 100644 --- a/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir +++ b/llvm/test/CodeGen/AMDGPU/early-tailduplicator-terminator.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=early-tailduplication -o - %s | FileCheck %s # Early tail duplication should not merge bb.6 into bb.5, adding a # non-terminator (S_SLEEP) after the terminator S_MOV_B32_term. diff --git a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll index 9677ec41ce268a..3d9616f02d52d1 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-lowbits.ll @@ -150,11 +150,21 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; ---------------------------------------------------------------------------- ; define i32 @bzhi32_d0(i32 %val, i32 %numlowbits) nounwind { -; GCN-LABEL: bzhi32_d0: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_bfe_u32 v0, v0, 0, v1 -; GCN-NEXT: s_setpc_b64 s[30:31] +; SI-LABEL: bzhi32_d0: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_sub_i32_e32 v1, vcc, 32, v1 +; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: bzhi32_d0: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_sub_u32_e32 v1, vcc, 32, v1 +; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 +; VI-NEXT: v_lshrrev_b32_e32 v0, v1, v0 +; VI-NEXT: s_setpc_b64 s[30:31] %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %val, %numhighbits %masked = lshr i32 %highbitscleared, %numhighbits diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll index 469d0453b9dfb1..ae309f3a614d50 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll @@ -6,7 +6,6 @@ declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) -declare void @llvm.amdgcn.cs.chain(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: chain_to_chain @@ -20,9 +19,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -30,8 +35,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -44,20 +49,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -136,9 +147,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -146,8 +163,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -160,20 +177,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -252,9 +275,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -262,8 +291,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -276,20 +305,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -368,9 +403,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -378,8 +419,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -392,20 +433,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -487,9 +534,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -510,15 +563,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect @@ -600,6 +659,93 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, unreachable } +; Indirect with callee that we cannot prove is uniform. +define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 %vgpr) { + ; GISEL-GFX11-LABEL: name: nonuniform_callee + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; GISEL-GFX10-LABEL: name: nonuniform_callee + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: nonuniform_callee + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; DAGISEL-GFX10-LABEL: name: nonuniform_callee + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B32_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $vgpr8 + call void(ptr, i32, i32, i32, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 -1, i32 inreg %sgpr, i32 %vgpr, i32 0) + unreachable +} + define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: non_imm_exec ; GISEL-GFX11: bb.1 (%ir-block.0): @@ -613,9 +759,15 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY6]] @@ -623,8 +775,8 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY8]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W32 [[COPY11]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: non_imm_exec ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -638,20 +790,26 @@ define amdgpu_cs_chain void @non_imm_exec(i32 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY7]] - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY8]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY9]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[COPY12]], @callee, 0, [[COPY]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: non_imm_exec ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -734,9 +892,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY4]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY7]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY8]] @@ -758,15 +922,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i32 i ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY8]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY9]] - ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY13]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W32 [[REG_SEQUENCE]], 0, 0, [[COPY2]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll index 51c28a02b7f821..90707e823c1478 100644 --- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll +++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll @@ -6,7 +6,6 @@ declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }) -declare void @llvm.amdgcn.cs.chain(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: chain_to_chain @@ -20,9 +19,15 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -30,8 +35,8 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -44,20 +49,26 @@ define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -136,9 +147,15 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -146,8 +163,8 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -160,20 +177,26 @@ define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -252,9 +275,15 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -262,8 +291,8 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: chain_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -276,20 +305,26 @@ define amdgpu_cs_chain void @chain_to_chain_preserve(<3 x i32> inreg %sgpr, { i3 ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: chain_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -368,9 +403,15 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY5]] @@ -378,8 +419,8 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY7]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: cs_to_chain_preserve ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -392,20 +433,26 @@ define amdgpu_cs void @cs_to_chain_preserve(<3 x i32> inreg %sgpr, { i32, ptr ad ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY1]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY3]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY4]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY6]] - ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:sgpr_128 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY10]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee_preserve ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee_preserve ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY8]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY11]], @callee_preserve, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: cs_to_chain_preserve ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -487,9 +534,15 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -510,15 +563,21 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect @@ -600,6 +659,92 @@ define amdgpu_cs_chain void @indirect(ptr inreg %callee, <3 x i32> inreg %sgpr, unreachable } +; Indirect with callee that we cannot prove is uniform. +define amdgpu_cs_chain void @nonuniform_callee(ptr %callee, i32 inreg %sgpr, i32 %vgpr) { + ; GISEL-GFX11-LABEL: name: nonuniform_callee + ; GISEL-GFX11: bb.1 (%ir-block.0): + ; GISEL-GFX11-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX11-NEXT: {{ $}} + ; GISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; GISEL-GFX10-LABEL: name: nonuniform_callee + ; GISEL-GFX10: bb.1 (%ir-block.0): + ; GISEL-GFX10-NEXT: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10 + ; GISEL-GFX10-NEXT: {{ $}} + ; GISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; GISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; GISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; GISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE]] + ; GISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; GISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY2]], 0, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; + ; DAGISEL-GFX11-LABEL: name: nonuniform_callee + ; DAGISEL-GFX11: bb.0 (%ir-block.0): + ; DAGISEL-GFX11-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX11-NEXT: {{ $}} + ; DAGISEL-GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX11-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX11-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX11-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8 + ; + ; DAGISEL-GFX10-LABEL: name: nonuniform_callee + ; DAGISEL-GFX10: bb.0 (%ir-block.0): + ; DAGISEL-GFX10-NEXT: liveins: $vgpr8, $vgpr9, $sgpr0, $vgpr10 + ; DAGISEL-GFX10-NEXT: {{ $}} + ; DAGISEL-GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr10 + ; DAGISEL-GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 + ; DAGISEL-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr9 + ; DAGISEL-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr8 + ; DAGISEL-GFX10-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY4]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0 + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 killed [[COPY5]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:ccr_sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_1]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_]], %subreg.sub1 + ; DAGISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] + ; DAGISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec + ; DAGISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; DAGISEL-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 -1 + ; DAGISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY7]] + ; DAGISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_2]] + ; DAGISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY]] + ; DAGISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 killed [[REG_SEQUENCE1]], 0, 0, killed [[S_MOV_B64_]], amdgpu_allvgprs, implicit $sgpr48_sgpr49_sgpr50_sgpr51, implicit $sgpr0, implicit $vgpr8 + call void(ptr, i64, i32, i32, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i64 -1, i32 inreg %sgpr, i32 %vgpr, i32 0) + unreachable +} define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) { ; GISEL-GFX11-LABEL: name: non_imm_exec ; GISEL-GFX11: bb.1 (%ir-block.0): @@ -615,9 +760,15 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY7]] @@ -625,8 +776,8 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY9]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX11-NEXT: SI_CS_CHAIN_TC_W64 [[COPY12]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11 ; ; GISEL-GFX10-LABEL: name: non_imm_exec ; GISEL-GFX10: bb.1 (%ir-block.0): @@ -642,20 +793,26 @@ define amdgpu_cs_chain void @non_imm_exec(i64 inreg %exec, <3 x i32> inreg %sgpr ; GISEL-GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY2]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY3]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY2]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY3]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY5]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY6]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY8]] - ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY9]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY12]] ; GISEL-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @callee ; GISEL-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @callee ; GISEL-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1 - ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] - ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY10]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[REG_SEQUENCE1]] + ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[COPY13]], @callee, 0, [[REG_SEQUENCE]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: non_imm_exec ; DAGISEL-GFX11: bb.0 (%ir-block.0): @@ -744,9 +901,15 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i ; GISEL-GFX11-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX11-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX11-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[COPY4]] - ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[COPY5]] - ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX11-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX11-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GISEL-GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX11-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX11-NEXT: $vgpr8 = COPY [[COPY7]] ; GISEL-GFX11-NEXT: $vgpr9 = COPY [[COPY8]] ; GISEL-GFX11-NEXT: $vgpr10 = COPY [[COPY9]] @@ -770,15 +933,21 @@ define amdgpu_cs_chain void @indirect_with_non_imm_exec(ptr inreg %callee, i64 i ; GISEL-GFX10-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr9 ; GISEL-GFX10-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr10 ; GISEL-GFX10-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY $vgpr11 - ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[COPY4]] - ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[COPY5]] - ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GISEL-GFX10-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GISEL-GFX10-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GISEL-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GISEL-GFX10-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] ; GISEL-GFX10-NEXT: $vgpr8 = COPY [[COPY7]] ; GISEL-GFX10-NEXT: $vgpr9 = COPY [[COPY8]] ; GISEL-GFX10-NEXT: $vgpr10 = COPY [[COPY9]] ; GISEL-GFX10-NEXT: $vgpr11 = COPY [[COPY10]] - ; GISEL-GFX10-NEXT: [[COPY11:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 - ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY11]] + ; GISEL-GFX10-NEXT: [[COPY14:%[0-9]+]]:sgpr_128 = COPY $sgpr48_sgpr49_sgpr50_sgpr51 + ; GISEL-GFX10-NEXT: $sgpr48_sgpr49_sgpr50_sgpr51 = COPY [[COPY14]] ; GISEL-GFX10-NEXT: SI_CS_CHAIN_TC_W64 [[REG_SEQUENCE]], 0, 0, [[REG_SEQUENCE1]], amdgpu_allvgprs, implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $sgpr48_sgpr49_sgpr50_sgpr51 ; ; DAGISEL-GFX11-LABEL: name: indirect_with_non_imm_exec diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll new file mode 100644 index 00000000000000..b0d578e421e280 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invalid-vector-gep.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s + +; Check that invalid IR is not produced on a vector typed +; getelementptr with a scalar alloca pointer base. + +define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { +; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset() { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[GETELEMENTPTR]], i64 0 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4 +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, align 4, addrspace(5) + %getelementptr = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> + %extractelement = extractelement <4 x ptr addrspace(5)> %getelementptr, i64 0 + store i32 0, ptr addrspace(5) %extractelement + ret void +} + +define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select(i1 %cond) { +; CHECK-LABEL: define amdgpu_kernel void @scalar_alloca_ptr_with_vector_gep_offset_select( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) +; CHECK-NEXT: [[GETELEMENTPTR0:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> +; CHECK-NEXT: [[GETELEMENTPTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[ALLOCA]], <4 x i64> +; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[COND]], <4 x ptr addrspace(5)> [[GETELEMENTPTR0]], <4 x ptr addrspace(5)> [[GETELEMENTPTR1]] +; CHECK-NEXT: [[EXTRACTELEMENT:%.*]] = extractelement <4 x ptr addrspace(5)> [[SELECT]], i64 1 +; CHECK-NEXT: store i32 0, ptr addrspace(5) [[EXTRACTELEMENT]], align 4 +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca i32, align 4, addrspace(5) + %getelementptr0 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> + %getelementptr1 = getelementptr inbounds i8, ptr addrspace(5) %alloca, <4 x i64> + %select = select i1 %cond, <4 x ptr addrspace(5)> %getelementptr0, <4 x ptr addrspace(5)> %getelementptr1 + %extractelement = extractelement <4 x ptr addrspace(5)> %select, i64 1 + store i32 0, ptr addrspace(5) %extractelement + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir b/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir index c23c8900096fba..be1a8aceb8c903 100644 --- a/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir +++ b/llvm/test/CodeGen/AMDGPU/stop-tail-duplicate-cfg-intrinsic.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=early-tailduplication -verify-machineinstrs -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes=early-tailduplication -o - %s | FileCheck %s --- name: stop_duplicate_cfg_intrinsic diff --git a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll index fd5f26ba35742f..7147257d27c4b8 100644 --- a/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll +++ b/llvm/test/CodeGen/PowerPC/saddo-ssubo.ll @@ -129,12 +129,11 @@ entry: define i1 @test_ssubo_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_ssubo_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: sub 5, 3, 4 -; CHECK-NEXT: cmpwi 1, 4, 0 -; CHECK-NEXT: cmpw 5, 3 -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: creqv 20, 5, 0 -; CHECK-NEXT: isel 3, 0, 3, 20 +; CHECK-NEXT: sub 3, 3, 4 +; CHECK-NEXT: extsw 4, 3 +; CHECK-NEXT: xor 3, 4, 3 +; CHECK-NEXT: addic 4, 3, -1 +; CHECK-NEXT: subfe 3, 4, 3 ; CHECK-NEXT: blr entry: %res = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll index ccfe94ecad286b..54f56eadf00342 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll @@ -559,7 +559,7 @@ define @after_fsrm3( %0, @after_fsrm3( %0, undef, %0, %1, - i64 5, i64 %2) + i64 3, i64 %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_x_f_qf.ll b/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_x_f_qf.ll index 3c19616576f55d..fbe1a97c201cf3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_x_f_qf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_x_f_qf.ll @@ -13,10 +13,8 @@ declare @llvm.riscv.sf.vfnrclip.x.f.qf.nxv1i8.nxv1f32.iXLen( define @intrinsic_sf_vfnrclip_x_f_qf_nxv1i8_nxv1f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_sf_vfnrclip_x_f_qf_nxv1i8_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: sf.vfnrclip.x.f.qf v9, v8, fa0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -24,7 +22,7 @@ entry: undef, %0, float %1, - iXLen 0, iXLen %2) + iXLen 7, iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll b/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll index dbcee311c6e35f..dfb0ccd982e845 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sf_vfnrclip_xu_f_qf.ll @@ -13,10 +13,8 @@ declare @llvm.riscv.sf.vfnrclip.xu.f.qf.nxv1i8.nxv1f32.iXLen( define @intrinsic_sf_vfnrclip_xu_f_qf_nxv1i8_nxv1f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_sf_vfnrclip_xu_f_qf_nxv1i8_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: sf.vfnrclip.xu.f.qf v9, v8, fa0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret entry: @@ -24,7 +22,7 @@ entry: undef, %0, float %1, - iXLen 0, iXLen %2) + iXLen 7, iXLen %2) ret %a } diff --git a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll index 6057bf38d4c4c4..c68250697c4a7b 100644 --- a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll +++ b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll @@ -1,7 +1,7 @@ ; All OpVariable instructions in a function must be the first instructions in the first block -; RUN: llc -O0 -mtriple=spirv-unknown-linux %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-linux %s -o - -filetype=obj | spirv-val %} +; RUN: llc -O0 -mtriple=spirv32-unknown-linux %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-linux %s -o - -filetype=obj | spirv-val %} ; CHECK-SPIRV: OpFunction ; CHECK-SPIRV-NEXT: OpLabel diff --git a/llvm/test/CodeGen/SPIRV/ShaderBufferImage.ll b/llvm/test/CodeGen/SPIRV/ShaderBufferImage.ll index 3c002e1849b8dd..1f203043e6a164 100644 --- a/llvm/test/CodeGen/SPIRV/ShaderBufferImage.ll +++ b/llvm/test/CodeGen/SPIRV/ShaderBufferImage.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-library %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-vulkan-library %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-library %s -o - -filetype=obj | spirv-val %} ; CHECK-NOT: OpCapability ImageBasic diff --git a/llvm/test/CodeGen/SPIRV/ShaderImage.ll b/llvm/test/CodeGen/SPIRV/ShaderImage.ll index 6ac58ce42f950f..9cd5fb33808056 100644 --- a/llvm/test/CodeGen/SPIRV/ShaderImage.ll +++ b/llvm/test/CodeGen/SPIRV/ShaderImage.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-library %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-vulkan-library %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-library %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: [[Float:%[0-9]+]] = OpTypeFloat 32 diff --git a/llvm/test/CodeGen/SPIRV/basic_float_types.ll b/llvm/test/CodeGen/SPIRV/basic_float_types.ll index 1c7a8a851f59c6..dfee1ace2205dd 100644 --- a/llvm/test/CodeGen/SPIRV/basic_float_types.ll +++ b/llvm/test/CodeGen/SPIRV/basic_float_types.ll @@ -1,4 +1,3 @@ -; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} diff --git a/llvm/test/CodeGen/SPIRV/basic_int_types.ll b/llvm/test/CodeGen/SPIRV/basic_int_types.ll index bb664568ed842f..e85e0919d17934 100644 --- a/llvm/test/CodeGen/SPIRV/basic_int_types.ll +++ b/llvm/test/CodeGen/SPIRV/basic_int_types.ll @@ -1,6 +1,4 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} - +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} diff --git a/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll b/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll index 3778d897929188..0d7b2b99f64eac 100644 --- a/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll +++ b/llvm/test/CodeGen/SPIRV/basic_int_types_spirvdis.ll @@ -1,5 +1,5 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" ; REQUIRES: spirv-tools -; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - --filetype=obj | spirv-dis | FileCheck %s ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - --filetype=obj | spirv-dis | FileCheck %s ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - --filetype=obj | spirv-dis | FileCheck %s diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll index c84b1c4b06c199..2d8692adf12a2a 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/SV_DispatchThreadID.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-unknown %s -o - -filetype=obj | spirv-val %} ; This file generated from the following command: diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveGetLaneIndex.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveGetLaneIndex.ll index 89a8575fa15991..d0a56854c32f8a 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveGetLaneIndex.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/WaveGetLaneIndex.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-unknown %s -o - -filetype=obj | spirv-val %} ; This file generated from the following command: diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll index 8f1092c2206ed8..c3e894afd710b6 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/abs.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/acos.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/acos.ll index 7c9450267cbe89..1936f6d2720737 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/acos.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/acos.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll index 7c40eed8465a1d..1edd69e2b0d5b0 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/all.ll @@ -1,5 +1,6 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HLSL -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OCL +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HLSL +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OCL ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; Make sure spirv operation function calls for all are generated. diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/any.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/any.ll index 54f5b7774b579e..dc6e9dc2033055 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/any.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/any.ll @@ -1,5 +1,6 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HLSL -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OCL +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HLSL +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OCL ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; Make sure spirv operation function calls for any are generated. diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/asin.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/asin.ll index 4d57c6fce77f70..be338f22bf1255 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/asin.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/asin.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan.ll index 65e198d0e71a35..5d352eb80af2d3 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll index bdbfc133efa29b..aba6f7583b6833 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/atan2.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ceil.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ceil.ll index 93677aadffa5e9..2c36459bdac95d 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ceil.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/ceil.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cos.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cos.ll index e9e9642354f5a5..937a545cc563cb 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cos.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cos.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cosh.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cosh.ll index 1560f9b9bd7605..2d7a4caada7d54 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cosh.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/cosh.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll index 57ec0bda2e1890..d47ec3ec27aa1b 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/countbits.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpMemoryModel Logical GLSL450 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/degrees.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/degrees.ll index 533bcca6f62169..6915362001288b 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/degrees.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/degrees.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp.ll index c1734a264ea042..43bb8e217a6705 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp2.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp2.ll index 4753b7bd9fe5bd..ae6c33cb0c7ef2 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp2.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/exp2.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/floor.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/floor.ll index ea19fa94ea3265..1ecaafc22e6fad 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/floor.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/floor.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll index b1ca34dc504c03..add94601bd1687 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmad.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll index ca0fcfe8d646b6..b202025f5dc83d 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmax.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll index adc563bcea5c6c..77e2ed1748e6ee 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/fmin.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/frac.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/frac.ll index 4c088b6b38103c..41c18b693574f7 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/frac.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/frac.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll new file mode 100644 index 00000000000000..e314361fe41812 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/group_memory_barrier_with_group_sync.ll @@ -0,0 +1,15 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpMemoryModel Logical GLSL450 + +define void @test_group_memory_barrier_with_group_sync() { +entry: + ; CHECK: %[[#TY:]] = OpTypeInt 32 0 + ; CHECK-DAG: %[[#MEM_SEM:]] = OpConstant %[[#TY]] 16 + ; CHECK-DAG: %[[#EXEC_AND_MEM_SCOPE:]] = OpConstant %[[#TY]] 2 + ; CHECK: OpControlBarrier %[[#EXEC_AND_MEM_SCOPE]] %[[#EXEC_AND_MEM_SCOPE]] %[[#MEM_SEM]] + call void @llvm.spv.group.memory.barrier.with.group.sync() + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll index 1be8eb7e651661..a161147c8b9647 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/imad.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#int_16:]] = OpTypeInt 16 0 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/lerp.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/lerp.ll index aa7ad8c74d336c..94272a84bd6392 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/lerp.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/lerp.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; Make sure SPIRV operation function calls for lerp are generated as FMix diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log.ll index f85b20324da515..d5dd92042537a1 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll index 32d63a0c0f1d21..a829422d84ebfa 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log10.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#extinst:]] = OpExtInstImport "GLSL.std.450" @@ -6,35 +7,22 @@ ; CHECK: %[[#float:]] = OpTypeFloat 32 ; CHECK: %[[#v4float:]] = OpTypeVector %[[#float]] 4 ; CHECK: %[[#float_0_30103001:]] = OpConstant %[[#float]] 0.30103000998497009 -; CHECK: %[[#_ptr_Function_v4float:]] = OpTypePointer Function %[[#v4float]] -; CHECK: %[[#_ptr_Function_float:]] = OpTypePointer Function %[[#float]] -define void @main() { +define void @main(float %f, <4 x float> %f4) { entry: -; CHECK: %[[#f:]] = OpVariable %[[#_ptr_Function_float]] Function -; CHECK: %[[#logf:]] = OpVariable %[[#_ptr_Function_float]] Function -; CHECK: %[[#f4:]] = OpVariable %[[#_ptr_Function_v4float]] Function -; CHECK: %[[#logf4:]] = OpVariable %[[#_ptr_Function_v4float]] Function - %f = alloca float, align 4 +; CHECK-DAG: %[[#f:]] = OpFunctionParameter %[[#float]] +; CHECK-DAG: %[[#f4:]] = OpFunctionParameter %[[#v4float]] %logf = alloca float, align 4 - %f4 = alloca <4 x float>, align 16 %logf4 = alloca <4 x float>, align 16 -; CHECK: %[[#load:]] = OpLoad %[[#float]] %[[#f]] Aligned 4 -; CHECK: %[[#log2:]] = OpExtInst %[[#float]] %[[#extinst]] Log2 %[[#load]] + +; CHECK: %[[#log2:]] = OpExtInst %[[#float]] %[[#extinst]] Log2 %[[#f]] ; CHECK: %[[#res:]] = OpFMul %[[#float]] %[[#log2]] %[[#float_0_30103001]] -; CHECK: OpStore %[[#logf]] %[[#res]] Aligned 4 - %0 = load float, ptr %f, align 4 - %elt.log10 = call float @llvm.log10.f32(float %0) - store float %elt.log10, ptr %logf, align 4 + %elt.log10 = call float @llvm.log10.f32(float %f) -; CHECK: %[[#load:]] = OpLoad %[[#v4float]] %[[#f4]] Aligned 16 -; CHECK: %[[#log2:]] = OpExtInst %[[#v4float]] %[[#extinst]] Log2 %[[#load]] +; CHECK: %[[#log2:]] = OpExtInst %[[#v4float]] %[[#extinst]] Log2 %[[#f4]] ; CHECK: %[[#res:]] = OpVectorTimesScalar %[[#v4float]] %[[#log2]] %[[#float_0_30103001]] -; CHECK: OpStore %[[#logf4]] %[[#res]] Aligned 16 - %1 = load <4 x float>, ptr %f4, align 16 - %elt.log101 = call <4 x float> @llvm.log10.v4f32(<4 x float> %1) - store <4 x float> %elt.log101, ptr %logf4, align 16 + %elt.log101 = call <4 x float> @llvm.log10.v4f32(<4 x float> %f4) ret void } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log2.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log2.ll index add7f77897f790..c71ca125c172a7 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log2.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/log2.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll index fa73b9c2a4d3ab..ddf89221be2ae4 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/normalize.ll @@ -1,3 +1,4 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" ; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/pow.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/pow.ll index 3ac98853b92fbc..38c51ca47d86c9 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/pow.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/pow.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/radians.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/radians.ll index 1fe8ab30ed9538..7aad4df76e3185 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/radians.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/radians.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll index 6f91162a378c8a..9c8c14c2a7220a 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rcp.ll @@ -1,4 +1,5 @@ - ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" + ; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64 ; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/reversebits.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/reversebits.ll index a23b15ab075d60..ce8175fdceb202 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/reversebits.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/reversebits.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpMemoryModel Logical GLSL450 diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/round.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/round.ll index 1c7e78261ffefd..0c88c55cbd395f 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/round.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/round.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rsqrt.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rsqrt.ll index 91023a1e401e16..33d3edc080fd78 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rsqrt.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/rsqrt.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sin.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sin.ll index a6ae70a48e5db4..7474b759945138 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sin.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sin.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sinh.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sinh.ll index 3b8bdbed0041bb..6a31b702187733 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sinh.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sinh.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smax.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smax.ll index 901e4764e15f67..cbf0b243ab2b3f 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smax.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smax.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smin.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smin.ll index c39c39f0455fad..960de853f3afdf 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smin.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/smin.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll index d18b16b843c37b..a05a31c18a754f 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/splitdouble.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; Make sure lowering is correctly generating spirv code. diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sqrt.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sqrt.ll index bb1f0346047e22..55d8a286a0e7fd 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sqrt.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/sqrt.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll index bb50d8c790f8ad..eac0b858955545 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/step.ll @@ -1,3 +1,4 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" ; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll index b4a6e1574f732b..6e2f0698b7b6d5 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tan.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tanh.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tanh.ll index 94fc3f0ec7abf5..1dfdf83fee31e7 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tanh.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/tanh.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#op_ext_glsl:]] = OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/trunc.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/trunc.ll index 2a308028a9b482..bae614ee59676c 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/trunc.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/trunc.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umax.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umax.ll index 01606a38732772..e2b14b089bc139 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umax.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umax.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umin.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umin.ll index 34185ad7143e32..708b76a93e6613 100644 --- a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umin.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/umin.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: OpExtInstImport "GLSL.std.450" diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll new file mode 100644 index 00000000000000..d5e95c7824144f --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageDynIdx.ll @@ -0,0 +1,41 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[CombindedType:%[0-9]+]] = OpTypeSampledImage [[BufferType]] +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[CombindedType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[CombindedType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[CombindedType]] [[ac]] + %buffer0 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[CombindedType]] [[ac]] + %buffer1 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll new file mode 100644 index 00000000000000..68bf3478fa9af0 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/CombinedSamplerImageNonUniformIdx.ll @@ -0,0 +1,48 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK: OpCapability ShaderNonUniform +; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[CombindedType:%[0-9]+]] = OpTypeSampledImage [[BufferType]] +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[CombindedType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[CombindedType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0:%[0-9]+]] = OpLoad [[CombindedType]] [[ac0]] + %buffer0 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[CombindedType]] [[ac1]] + %buffer1 = call target("spirv.SampledImage", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/HlslBufferLoad.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/HlslBufferLoad.ll similarity index 85% rename from llvm/test/CodeGen/SPIRV/HlslBufferLoad.ll rename to llvm/test/CodeGen/SPIRV/hlsl-resources/HlslBufferLoad.ll index fe960f0d6f2f9a..66d5f0f4b05fe6 100644 --- a/llvm/test/CodeGen/SPIRV/HlslBufferLoad.ll +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/HlslBufferLoad.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-library %s -o - | FileCheck %s +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv-vulkan-library %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-vulkan-library %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: OpDecorate [[IntBufferVar:%[0-9]+]] DescriptorSet 16 @@ -18,13 +19,13 @@ ; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} ; CHECK-NEXT: OpLabel define void @RWBufferLoad() #0 { -; CHECK-NEXT: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( i32 16, i32 7, i32 1, i32 0, i1 false) ; Make sure we use the same variable with multiple loads. -; CHECK-NEXT: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( i32 16, i32 7, i32 1, i32 0, i1 false) @@ -36,7 +37,7 @@ define void @RWBufferLoad() #0 { define void @UseDifferentGlobalVar() #0 { ; Make sure we use a different variable from the first function. They have ; different types. -; CHECK-NEXT: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeFloat]] [[FloatBufferVar]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeFloat]] [[FloatBufferVar]] %buffer0 = call target("spirv.Image", float, 5, 2, 0, 0, 2, 3) @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_3( i32 16, i32 7, i32 1, i32 0, i1 false) @@ -48,7 +49,7 @@ define void @UseDifferentGlobalVar() #0 { define void @ReuseGlobalVarFromFirstFunction() #0 { ; Make sure we use the same variable as the first function. They should be the ; same in case one function calls the other. -; CHECK-NEXT: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[RWBufferTypeInt]] [[IntBufferVar]] %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( i32 16, i32 7, i32 1, i32 0, i1 false) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll new file mode 100644 index 00000000000000..39fdc866af7ff2 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability InputAttachmentArrayDynamicIndexing +; SCHECK-NEXT: OpCapability InputAttachment +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] SubpassData 2 0 0 2 Unknown {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll new file mode 100644 index 00000000000000..b05b7eb885b426 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/InputAttachmentImageNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability InputAttachmentArrayNonUniformIndexing +; SCHECK-NEXT: OpCapability InputAttachment +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] SubpassData 2 0 0 2 Unknown {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 6, 2, 0, 0, 2, 0) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_6_2_0_0_2_0( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll new file mode 100644 index 00000000000000..0c47eeb606e802 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageDynIdx.ll @@ -0,0 +1,66 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK-DAG: OpDecorate [[OtherVar:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[OtherVar]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK-DAG: [[OtherArraySize:%[0-9]+]] = OpConstant [[int]] 5 +; CHECK-DAG: [[OtherBufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[OtherArraySize]] +; CHECK-DAG: [[OtherArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[OtherBufferArrayType]] +; CHECK-DAG: [[OtherVar]] = OpVariable [[OtherArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @DifferentArraySizesAreDifferentVariables() #0 { +; Make sure we use different variables when the array sizes are different +; same in case one function calls the other. +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[OtherVar]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 5, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll new file mode 100644 index 00000000000000..ec94a8eeac2e42 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SampledImageNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing +; CHECK-NEXT: OpCapability Sampled1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll new file mode 100644 index 00000000000000..9371a792f84b2a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayDynIdx.ll @@ -0,0 +1,39 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability SampledImageArrayDynamicIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[SamplerType:%[0-9]+]] = OpTypeSampler +; CHECK-DAG: [[SamplerPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[SamplerArrayType:%[0-9]+]] = OpTypeArray [[SamplerType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[SamplerType]] [[ac]] + %buffer0 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[SamplerType]] [[ac]] + %buffer1 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll new file mode 100644 index 00000000000000..151c4aa6d4365c --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/SamplerArrayNonUniformIdx.ll @@ -0,0 +1,46 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: ShaderNonUniform +; CHECK-NEXT: OpCapability SampledImageArrayNonUniformIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[SamplerType:%[0-9]+]] = OpTypeSampler +; CHECK-DAG: [[SamplerPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[SamplerArrayType:%[0-9]+]] = OpTypeArray [[SamplerType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[SamplerArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[SamplerType]] [[ac0]] + %buffer0 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[SamplerPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[SamplerType]] [[ac1]] + %buffer1 = call target("spirv.Sampler") + @llvm.spv.handle.fromBinding.tspirv.Image( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll new file mode 100644 index 00000000000000..908a81777a04a0 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK-NEXT: OpCapability StorageImageArrayDynamicIndexing +; CHECK-NEXT: OpCapability Image1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll new file mode 100644 index 00000000000000..4a582b31d60f1d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageImageNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; CHECK: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability StorageImageArrayNonUniformIndexing +; CHECK-NEXT: OpCapability Image1D +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] 1D 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 0, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_0_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll new file mode 100644 index 00000000000000..d144dcf505fa18 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability ImageBuffer +; CHECK-NEXT: OpCapability StorageTexelBufferArrayDynamicIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @void() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll new file mode 100644 index 00000000000000..2f96eda4518f06 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/StorageTexelBufferNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability ImageBuffer +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability StorageTexelBufferArrayNonUniformIndexingEXT +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 2 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 2, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_2_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll new file mode 100644 index 00000000000000..117363241bd966 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferDynIdx.ll @@ -0,0 +1,40 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability SampledBuffer +; CHECK-NEXT: OpCapability UniformTexelBufferArrayDynamicIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One:%[0-9]+]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero:%[0-9]+]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 false) + +; CHECK: [[ac:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[buffer:%[0-9]+]] = OpLoad [[BufferType]] [[ac]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 false) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll new file mode 100644 index 00000000000000..cec16a8e7c8b47 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-resources/UniformTexelBufferNonUniformIdx.ll @@ -0,0 +1,47 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" +; RUN: llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv1.5-vulkan-library %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Shader +; SCHECK-NEXT: OpCapability SampledBuffer +; CHECK-NEXT: OpCapability ShaderNonUniformEXT +; CHECK-NEXT: OpCapability UniformTexelBufferArrayNonUniformIndexing +; CHECK-NOT: OpCapability + +; CHECK-DAG: OpDecorate [[Var:%[0-9]+]] DescriptorSet 3 +; CHECK-DAG: OpDecorate [[Var]] Binding 4 +; CHECK: OpDecorate [[Zero:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld0:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[One:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ac1:%[0-9]+]] NonUniform +; CHECK: OpDecorate [[ld1:%[0-9]+]] NonUniform + +; CHECK-DAG: [[int:%[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: [[BufferType:%[0-9]+]] = OpTypeImage [[int]] Buffer 2 0 0 1 R32i {{$}} +; CHECK-DAG: [[BufferPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferType]] +; CHECK-DAG: [[ArraySize:%[0-9]+]] = OpConstant [[int]] 3 +; CHECK-DAG: [[One]] = OpConstant [[int]] 1 +; CHECK-DAG: [[Zero]] = OpConstant [[int]] 0 +; CHECK-DAG: [[BufferArrayType:%[0-9]+]] = OpTypeArray [[BufferType]] [[ArraySize]] +; CHECK-DAG: [[ArrayPtrType:%[0-9]+]] = OpTypePointer UniformConstant [[BufferArrayType]] +; CHECK-DAG: [[Var]] = OpVariable [[ArrayPtrType]] UniformConstant + +; CHECK: {{%[0-9]+}} = OpFunction {{%[0-9]+}} DontInline {{%[0-9]+}} +; CHECK-NEXT: OpLabel +define void @main() #0 { +; CHECK: [[ac0]] = OpAccessChain [[BufferPtrType]] [[Var]] [[Zero]] +; CHECK: [[ld0]] = OpLoad [[BufferType]] [[ac0]] + %buffer0 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 0, i1 true) + +; CHECK: [[ac1:%[0-9]+]] = OpAccessChain [[BufferPtrType]] [[Var]] [[One]] +; CHECK: [[ld1]] = OpLoad [[BufferType]] [[ac1]] + %buffer1 = call target("spirv.Image", i32, 5, 2, 0, 0, 1, 24) + @llvm.spv.handle.fromBinding.tspirv.Image_f32_5_2_0_0_1_24( + i32 3, i32 4, i32 3, i32 1, i1 true) + ret void +} + +attributes #0 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } diff --git a/llvm/test/CodeGen/SPIRV/literals.ll b/llvm/test/CodeGen/SPIRV/literals.ll index 4109bb6de56110..86a366976a6e26 100644 --- a/llvm/test/CodeGen/SPIRV/literals.ll +++ b/llvm/test/CodeGen/SPIRV/literals.ll @@ -1,12 +1,10 @@ +; TODO(pull/110270): verifier, fix G_BITCAST error "bitcast must change type" ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} -; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} - ; CHECK: %[[#F32:]] = OpTypeFloat 32 ; CHECK: %[[#F64:]] = OpTypeFloat 64 diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll index 6fa3f4e53cc598..8d14c3a359963f 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types-rev.ll @@ -1,7 +1,4 @@ -; The goal of the test case is to ensure that OpPhi is consistent with respect to operand types. -; -verify-machineinstrs is not available due to mutually exclusive requirements for G_BITCAST and G_PHI. - -; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Char:]] = OpTypeInt 8 0 diff --git a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll index 4fbaae25567300..07824d4ed6cd85 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/phi-valid-operand-types.ll @@ -1,7 +1,4 @@ -; The goal of the test case is to ensure that OpPhi is consistent with respect to operand types. -; -verify-machineinstrs is not available due to mutually exclusive requirements for G_BITCAST and G_PHI. - -; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK: %[[#Char:]] = OpTypeInt 8 0 diff --git a/llvm/test/CodeGen/SPIRV/structurizer/basic-if.ll b/llvm/test/CodeGen/SPIRV/structurizer/basic-if.ll new file mode 100644 index 00000000000000..5585e4a07590a3 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/structurizer/basic-if.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +target triple = "spirv-unknown-vulkan1.3-compute" + +; Function Attrs: convergent noinline norecurse nounwind optnone +define spir_func noundef i32 @_Z7processv() #0 { + +; CHECK: %[[#entry:]] = OpLabel +; CHECK: OpSelectionMerge %[[#merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#left:]] %[[#right:]] +entry: + %0 = call token @llvm.experimental.convergence.entry() + %1 = alloca i32, align 4 + br i1 true, label %left, label %right + +; CHECK: %[[#left]] = OpLabel +; CHECK: OpBranch %[[#merge]] +left: + store i32 0, ptr %1 + br label %end + +; CHECK: %[[#right]] = OpLabel +; CHECK: OpBranch %[[#merge]] +right: + store i32 0, ptr %1 + br label %end + +; CHECK: %[[#merge]] = OpLabel +; CHECK: OpReturnValue %[[#]] +end: + ret i32 0 +} + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.entry() #1 + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.loop() #1 + + +attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #2 = { convergent norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent } + +!llvm.module.flags = !{!0, !1, !2} + + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} +!2 = !{i32 7, !"frame-pointer", i32 2} diff --git a/llvm/test/CodeGen/SPIRV/structurizer/basic-imbalanced-if.ll b/llvm/test/CodeGen/SPIRV/structurizer/basic-imbalanced-if.ll new file mode 100644 index 00000000000000..810b5785e4b1a4 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/structurizer/basic-imbalanced-if.ll @@ -0,0 +1,47 @@ +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" +target triple = "spirv-unknown-vulkan1.3-compute" + +; Function Attrs: convergent noinline norecurse nounwind optnone +define spir_func noundef i32 @_Z7processv() #0 { + +; CHECK: %[[#entry:]] = OpLabel +; CHECK: OpSelectionMerge %[[#merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#left:]] %[[#merge]] +entry: + %0 = call token @llvm.experimental.convergence.entry() + %1 = alloca i32, align 4 + br i1 true, label %left, label %end + +; CHECK: %[[#left]] = OpLabel +; CHECK: OpBranch %[[#merge]] +left: + store i32 0, ptr %1 + br label %end + +; CHECK: %[[#merge]] = OpLabel +; CHECK: OpReturnValue %[[#]] +end: + ret i32 0 +} + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.entry() #1 + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.loop() #1 + + +attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #2 = { convergent norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent } + +!llvm.module.flags = !{!0, !1, !2} + + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} +!2 = !{i32 7, !"frame-pointer", i32 2} diff --git a/llvm/test/CodeGen/SPIRV/structurizer/basic-loop.ll b/llvm/test/CodeGen/SPIRV/structurizer/basic-loop.ll new file mode 100644 index 00000000000000..ded9c335c5a25f --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/structurizer/basic-loop.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" +target triple = "spirv-unknown-vulkan1.3-compute" + +; Function Attrs: convergent noinline norecurse nounwind optnone +define spir_func noundef i32 @_Z7processv() #0 { + +; CHECK: %[[#entry:]] = OpLabel +; CHECK: OpBranch %[[#header:]] +entry: + %0 = call token @llvm.experimental.convergence.entry() + %1 = alloca i32, align 4 + br label %header + +; CHECK: %[[#header]] = OpLabel +; CHECK: OpLoopMerge %[[#merge:]] %[[#continue:]] None +; CHECK: OpBranchConditional %[[#]] %[[#body:]] %[[#merge]] +header: + %2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] + br i1 true, label %body, label %merge + +; CHECK: %[[#body]] = OpLabel +; CHECK: OpBranch %[[#continue]] +body: + store i32 0, ptr %1 + br label %continue + +continue: + br label %header +; CHECK: %[[#continue]] = OpLabel +; CHECK: OpBranch %[[#header]] + +; CHECK: %[[#merge]] = OpLabel +; CHECK: OpReturnValue %[[#]] +merge: + ret i32 0 +} + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.entry() #1 + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.loop() #1 + + +attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #2 = { convergent norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent } + +!llvm.module.flags = !{!0, !1, !2} + + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} +!2 = !{i32 7, !"frame-pointer", i32 2} + diff --git a/llvm/test/CodeGen/SPIRV/structurizer/basic-phi.ll b/llvm/test/CodeGen/SPIRV/structurizer/basic-phi.ll new file mode 100644 index 00000000000000..a43d25e9b06d5d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/structurizer/basic-phi.ll @@ -0,0 +1,58 @@ +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" +target triple = "spirv-unknown-vulkan1.3-compute" + +; Function Attrs: convergent noinline norecurse nounwind optnone +define spir_func noundef i32 @_Z7processv() #0 { + +; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#]] 0 +; CHECK-DAG: %[[#int_1:]] = OpConstant %[[#]] 1 + +; CHECK: %[[#entry:]] = OpLabel +; CHECK: %[[#var:]] = OpVariable %[[#]] Function +; CHECK: OpSelectionMerge %[[#merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#left:]] %[[#right:]] +entry: + %0 = call token @llvm.experimental.convergence.entry() + br i1 true, label %left, label %right + +; CHECK: %[[#left]] = OpLabel +; CHECK-NEXT: OpStore %[[#var]] %[[#int_0]] +; CHECK-NEXT: OpBranch %[[#merge]] +left: + br label %end + +; CHECK: %[[#right]] = OpLabel +; CHECK-NEXT: OpStore %[[#var]] %[[#int_1]] +; CHECK-NEXT: OpBranch %[[#merge]] +right: + br label %end + +; CHECK: %[[#merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#var]] +; CHECK: OpReturnValue %[[#tmp]] +end: + %1 = phi i32 [ 0, %left ], [ 1, %right ] + ret i32 %1 +} + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.entry() #1 + +; Function Attrs: convergent nocallback nofree nosync nounwind willreturn memory(none) +declare token @llvm.experimental.convergence.loop() #1 + + +attributes #0 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #2 = { convergent norecurse "frame-pointer"="all" "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #3 = { convergent } + +!llvm.module.flags = !{!0, !1, !2} + + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} +!2 = !{i32 7, !"frame-pointer", i32 2} diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.cond-op.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.cond-op.ll index 4934b17c8c002e..86033608deb6e2 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.cond-op.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.cond-op.ll @@ -8,16 +8,17 @@ target triple = "spirv-unknown-vulkan1.3-compute" ; CHECK-DAG: OpName %[[#fn:]] "_Z2fnv" ; CHECK-DAG: OpName %[[#fn1:]] "_Z3fn1v" ; CHECK-DAG: OpName %[[#fn2:]] "_Z3fn2v" -; CHECK-DAG: OpName %[[#val:]] "val" -; CHECK-DAG: OpName %[[#a:]] "a" -; CHECK-DAG: OpName %[[#b:]] "b" -; CHECK-DAG: OpName %[[#c:]] "c" + +; CHECK-DAG: OpName %[[#r2m_a:]] ".reg2mem3" +; CHECK-DAG: OpName %[[#r2m_b:]] ".reg2mem1" +; CHECK-DAG: OpName %[[#r2m_c:]] ".reg2mem" ; CHECK-DAG: %[[#int_ty:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#bool_ty:]] = OpTypeBool -; CHECK-DAG: %[[#int_pfty:]] = OpTypePointer Function %[[#int_ty]] -; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#int_ty]] 0 +; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#]] 0 +; CHECK-DAG: %[[#int_1:]] = OpConstant %[[#]] 1 +; CHECK-DAG: %[[#true:]] = OpConstantTrue +; CHECK-DAG: %[[#false:]] = OpConstantFalse declare token @llvm.experimental.convergence.entry() #1 @@ -44,100 +45,86 @@ entry: ; CHECK: %[[#process]] = OpFunction %[[#int_ty]] define spir_func noundef i32 @_Z7processv() #0 { + +; CHECK: %[[#entry:]] = OpLabel +; CHECK-DAG: %[[#r2m_a]] = OpVariable %[[#]] Function +; CHECK: OpSelectionMerge %[[#a_merge:]] +; CHECK: OpBranchConditional %[[#]] %[[#a_true:]] %[[#a_false:]] entry: - ; CHECK: %[[#entry:]] = OpLabel %0 = call token @llvm.experimental.convergence.entry() - %a = alloca i32, align 4 - %b = alloca i32, align 4 - %c = alloca i32, align 4 - %val = alloca i32, align 4 - store i32 0, ptr %a, align 4 - store i32 1, ptr %b, align 4 - store i32 2, ptr %c, align 4 - store i32 0, ptr %val, align 4 - ; CHECK-DAG: %[[#a]] = OpVariable %[[#int_pfty]] Function - ; CHECK-DAG: %[[#b]] = OpVariable %[[#int_pfty]] Function - ; CHECK-DAG: %[[#c]] = OpVariable %[[#int_pfty]] Function - ; CHECK-DAG: %[[#val]] = OpVariable %[[#int_pfty]] Function - %1 = load i32, ptr %a, align 4 - %tobool = icmp ne i32 %1, 0 - br i1 %tobool, label %cond.true, label %cond.false - ; CHECK: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#a]] - ; CHECK: %[[#cond:]] = OpINotEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0]] - ; CHECK: OpSelectionMerge %[[#cond_end:]] - ; CHECK: OpBranchConditional %[[#cond]] %[[#cond_true:]] %[[#cond_false:]] - -cond.true: ; preds = %entry - %2 = load i32, ptr %b, align 4 - br label %cond.end - ; CHECK: %[[#cond_true]] = OpLabel - ; CHECK: OpBranch %[[#cond_end]] - -cond.false: ; preds = %entry - %3 = load i32, ptr %c, align 4 - br label %cond.end - ; CHECK: %[[#cond_false]] = OpLabel - ; CHECK: %[[#load_c:]] = OpLoad %[[#]] %[[#c]] - ; CHECK: OpBranch %[[#cond_end]] - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ] - %tobool1 = icmp ne i32 %cond, 0 - br i1 %tobool1, label %if.then, label %if.end - ; CHECK: %[[#cond_end]] = OpLabel - ; CHECK: %[[#tmp:]] = OpPhi %[[#int_ty]] %[[#load_b:]] %[[#cond_true]] %[[#load_c]] %[[#cond_false]] - ; CHECK: OpSelectionMerge %[[#if_end:]] - ; CHECK: OpBranchConditional %[[#]] %[[#if_then:]] %[[#if_end]] - -if.then: ; preds = %cond.end - %4 = load i32, ptr %val, align 4 - %inc = add nsw i32 %4, 1 - store i32 %inc, ptr %val, align 4 - br label %if.end - ; CHECK: %[[#if_then]] = OpLabel - ; CHECK: OpBranch %[[#if_end]] - -if.end: ; preds = %if.then, %cond.end - %call2 = call spir_func noundef i32 @_Z2fnv() #4 [ "convergencectrl"(token %0) ] - %tobool3 = icmp ne i32 %call2, 0 - br i1 %tobool3, label %cond.true4, label %cond.false6 - ; CHECK: %[[#if_end]] = OpLabel - ; CHECK: OpSelectionMerge %[[#cond_end8:]] - ; CHECK: OpBranchConditional %[[#]] %[[#cond_true4:]] %[[#cond_false6:]] - -cond.true4: ; preds = %if.end - %call5 = call spir_func noundef i32 @_Z3fn1v() #4 [ "convergencectrl"(token %0) ] - br label %cond.end8 - ; CHECK: %[[#cond_true4]] = OpLabel - ; CHECK: OpBranch %[[#cond_end8]] - -cond.false6: ; preds = %if.end - %call7 = call spir_func noundef i32 @_Z3fn2v() #4 [ "convergencectrl"(token %0) ] - br label %cond.end8 - ; CHECK: %[[#cond_false6]] = OpLabel - ; CHECK: OpBranch %[[#cond_end8]] - -cond.end8: ; preds = %cond.false6, %cond.true4 - %cond9 = phi i32 [ %call5, %cond.true4 ], [ %call7, %cond.false6 ] - %tobool10 = icmp ne i32 %cond9, 0 - br i1 %tobool10, label %if.then11, label %if.end13 - ; CHECK: %[[#cond_end8]] = OpLabel - ; CHECK: OpSelectionMerge %[[#if_end13:]] - ; CHECK: OpBranchConditional %[[#]] %[[#if_then11:]] %[[#if_end13]] - -if.then11: ; preds = %cond.end8 - %5 = load i32, ptr %val, align 4 - %inc12 = add nsw i32 %5, 1 - store i32 %inc12, ptr %val, align 4 - br label %if.end13 - ; CHECK: %[[#if_then11]] = OpLabel - ; CHECK: OpBranch %[[#if_end13]] - -if.end13: ; preds = %if.then11, %cond.end8 - %6 = load i32, ptr %val, align 4 - ret i32 %6 - ; CHECK: %[[#if_end13]] = OpLabel - ; CHECK: OpReturnValue + %var = alloca i32 + br i1 true, label %a_true, label %a_false + +; CHECK: %[[#a_true]] = OpLabel +; CHECK: OpStore %[[#r2m_a]] %[[#true]] +; CHECK: OpBranch %[[#a_merge]] +a_true: + br label %a_merge + +; CHECK: %[[#a_false]] = OpLabel +; CHECK: OpStore %[[#r2m_a]] %[[#false]] +; CHECK: OpBranch %[[#a_merge]] +a_false: + br label %a_merge + +; CHECK: %[[#a_merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#r2m_a]] +; CHECK: OpSelectionMerge %[[#b_merge:]] +; CHECK: OpBranchConditional %[[#]] %[[#b_true:]] %[[#b_merge]] +a_merge: + %1 = phi i1 [ true, %a_true ], [ false, %a_false ] + br i1 %1, label %b_true, label %b_merge + +; CHECK: %[[#b_true]] = OpLabel +; CHECK: OpBranch %[[#b_merge]] +b_true: + store i32 0, ptr %var ; Prevents whole branch optimization. + br label %b_merge + +; CHECK: %[[#b_merge]] = OpLabel +; CHECK: OpFunctionCall +; CHECK: OpSelectionMerge %[[#c_merge:]] +; CHECK: OpBranchConditional %[[#]] %[[#c_true:]] %[[#c_false:]] +b_merge: + %f1 = call spir_func noundef i32 @_Z2fnv() #4 [ "convergencectrl"(token %0) ] + br i1 true, label %c_true, label %c_false + +; CHECK: %[[#c_true]] = OpLabel +; CHECK: %[[#]] = OpFunctionCall +; CHECK: OpStore %[[#r2m_b]] %[[#]] +; CHECK: OpBranch %[[#c_merge]] +c_true: + %f2 = call spir_func noundef i32 @_Z3fn1v() #4 [ "convergencectrl"(token %0) ] + br label %c_merge + +; CHECK: %[[#c_false]] = OpLabel +; CHECK: %[[#]] = OpFunctionCall +; CHECK: OpStore %[[#r2m_b]] %[[#]] +; CHECK: OpBranch %[[#c_merge]] +c_false: + %f3 = call spir_func noundef i32 @_Z3fn2v() #4 [ "convergencectrl"(token %0) ] + br label %c_merge + +; CHECK: %[[#c_merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#r2m_b]] +; CHECK: OpStore %[[#r2m_c]] %[[#tmp:]] +; CHECK: OpSelectionMerge %[[#d_merge:]] +; CHECK: OpBranchConditional %[[#]] %[[#d_true:]] %[[#d_merge]] +c_merge: + %5 = phi i32 [ %f2, %c_true ], [ %f3, %c_false ] + br i1 true, label %d_true, label %d_merge + +; CHECK: %[[#d_true]] = OpLabel +; CHECK: OpBranch %[[#d_merge]] +d_true: + store i32 0, ptr %var ; Prevents whole branch optimization. + br label %d_merge + +; CHECK: %[[#d_merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#r2m_c]] +; CHECK: OpReturnValue %[[#tmp]] +d_merge: + ret i32 %5 } ; Function Attrs: convergent noinline norecurse nounwind optnone diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.do.break.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.do.break.ll index 3fc440dc445e14..8e05bf1ebdaa7c 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.do.break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.do.break.ll @@ -1,5 +1,4 @@ ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=2 ; int foo() { return true; } ; diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.do.continue.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.do.continue.ll index 051f0685a40426..36b61745fa55af 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.do.continue.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.do.continue.ll @@ -1,4 +1,3 @@ -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=10 ; RUN: %if spirv-tools %{ llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-as --preserve-numeric-ids - -o - | spirv-val %} ; ; Source HLSL: diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.do.nested.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.do.nested.ll index a28e1c7b942de0..d8e17c2291a7c6 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.do.nested.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.do.nested.ll @@ -1,5 +1,4 @@ ; RUN: %if spirv-tools %{ llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-as --preserve-numeric-ids - -o - | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=142 ; ; Source HLSL: ; diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.break.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.break.ll index f2e60f916c795b..9d8cab44c0cbb5 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.break.ll @@ -1,5 +1,4 @@ ; RUN: %if spirv-tools %{ llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-as --preserve-numeric-ids - -o - | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=4 ; ; Source HLSL: ; diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.continue.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.continue.ll index 31a3433cae4c07..12e6473c15c757 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.continue.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.continue.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=19 ; ; int process() { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.nested.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.nested.ll index 1619a519273b37..ddd4c5222301a3 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.nested.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.nested.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=2563170 ; ; int process() { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.short-circuited-cond.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.short-circuited-cond.ll index 1b5e868317fba5..07c20ebadd159e 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.for.short-circuited-cond.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.for.short-circuited-cond.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=9 ; ; int process() { @@ -139,10 +138,10 @@ ; CHECK: OpBranchConditional %[[#]] %[[#bb130:]] %[[#bb125:]] ; CHECK: %[[#bb130:]] = OpLabel ; CHECK: OpBranch %[[#bb126:]] -; CHECK: %[[#bb125:]] = OpLabel -; CHECK: OpReturnValue %[[#]] ; CHECK: %[[#bb126:]] = OpLabel ; CHECK: OpBranch %[[#bb124:]] +; CHECK: %[[#bb125:]] = OpLabel +; CHECK: OpReturnValue %[[#]] ; CHECK: OpFunctionEnd ; CHECK: %[[#func_83:]] = OpFunction %[[#void:]] DontInline %[[#]] ; CHECK: %[[#bb131:]] = OpLabel diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.const-cond.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.const-cond.ll index f3a9109b06ee2e..df406917fdff97 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.const-cond.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.const-cond.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=3 ; ; int process() { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.for.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.for.ll index 42c885070453ac..93effc141fc812 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.for.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.for.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=6 ; ; int process() { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.nested.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.nested.ll index 1fea1ebd888f5c..a69475a59db6f4 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.nested.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.nested.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=3 ; diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.plain.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.plain.ll index c3b0caa4e26947..8fa8c2c14878bf 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.if.plain.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.if.plain.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=2 ; ; int process() { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-and.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-and.ll index a5f00071ca2712..9d35fb3c82b077 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-and.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-and.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=1 ; ; int fn() { return true; } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-or.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-or.ll index 73db1c897711fa..0a986661e50d5b 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-or.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.logical-or.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=3 ; ; int fn() { return true; } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.return.early.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.return.early.ll index 62d18cdf538c37..dfaca85be2280c 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.return.early.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.return.early.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=0 ; ; int process() { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.ll index d2447fe4562435..8e2a0506d286d6 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=308 ; ; int foo() { return 200; } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple.ll index 74c5a2edf7c2fb..125e3f751315eb 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=5 ; ; int process() { diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple2.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple2.ll index bfe3b45779afb6..cf50b982b23dc8 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple2.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.switch.ifstmt.simple2.ll @@ -1,6 +1,5 @@ ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | spirv-sim --function=_Z7processv --wave=1 --expects=5 ; ; int foo() { return 200; } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/cf.while.break.ll b/llvm/test/CodeGen/SPIRV/structurizer/cf.while.break.ll index 8f3981a2449682..769be32c9fc41e 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/cf.while.break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/cf.while.break.ll @@ -41,47 +41,45 @@ ; } ; CHECK: %[[#func_16:]] = OpFunction %[[#uint:]] DontInline %[[#]] -; CHECK: %[[#bb37:]] = OpLabel +; CHECK: %[[#bb44:]] = OpLabel ; CHECK: OpReturnValue %[[#]] ; CHECK: OpFunctionEnd -; CHECK: %[[#func_17:]] = OpFunction %[[#void:]] DontInline %[[#]] -; CHECK: %[[#bb38:]] = OpLabel -; CHECK: OpBranch %[[#bb39:]] -; CHECK: %[[#bb39:]] = OpLabel -; CHECK: OpLoopMerge %[[#bb40:]] %[[#bb41:]] None -; CHECK: OpBranchConditional %[[#]] %[[#bb42:]] %[[#bb40:]] -; CHECK: %[[#bb42:]] = OpLabel -; CHECK: OpBranchConditional %[[#]] %[[#bb40:]] %[[#bb43:]] -; CHECK: %[[#bb43:]] = OpLabel -; CHECK: OpBranchConditional %[[#]] %[[#bb40:]] %[[#bb41:]] -; CHECK: %[[#bb40:]] = OpLabel -; CHECK: OpSelectionMerge %[[#bb44:]] None -; CHECK: OpSwitch %[[#]] %[[#bb44:]] 1 %[[#bb44:]] 2 %[[#bb44:]] -; CHECK: %[[#bb41:]] = OpLabel -; CHECK: OpBranch %[[#bb39:]] -; CHECK: %[[#bb44:]] = OpLabel -; CHECK: OpBranch %[[#bb45:]] +; CHECK: %[[#func_19:]] = OpFunction %[[#void:]] DontInline %[[#]] ; CHECK: %[[#bb45:]] = OpLabel ; CHECK: OpBranch %[[#bb46:]] ; CHECK: %[[#bb46:]] = OpLabel -; CHECK: OpBranch %[[#bb47:]] -; CHECK: %[[#bb47:]] = OpLabel -; CHECK: OpSelectionMerge %[[#bb48:]] None -; CHECK: OpBranchConditional %[[#]] %[[#bb49:]] %[[#bb48:]] +; CHECK: OpLoopMerge %[[#bb47:]] %[[#bb48:]] None +; CHECK: OpBranchConditional %[[#]] %[[#bb49:]] %[[#bb47:]] ; CHECK: %[[#bb49:]] = OpLabel -; CHECK: OpBranch %[[#bb48:]] -; CHECK: %[[#bb48:]] = OpLabel -; CHECK: OpBranch %[[#bb50:]] +; CHECK: OpBranchConditional %[[#]] %[[#bb47:]] %[[#bb50:]] ; CHECK: %[[#bb50:]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#bb47:]] %[[#bb48:]] +; CHECK: %[[#bb48:]] = OpLabel +; CHECK: OpBranch %[[#bb46:]] +; CHECK: %[[#bb47:]] = OpLabel +; CHECK: OpSelectionMerge %[[#bb51:]] None +; CHECK: OpSwitch %[[#]] %[[#bb51:]] 1 %[[#bb51:]] 2 %[[#bb51:]] +; CHECK: %[[#bb51:]] = OpLabel +; CHECK: OpBranch %[[#bb52:]] +; CHECK: %[[#bb52:]] = OpLabel +; CHECK: OpBranch %[[#bb53:]] +; CHECK: %[[#bb53:]] = OpLabel +; CHECK: OpBranch %[[#bb54:]] +; CHECK: %[[#bb54:]] = OpLabel +; CHECK: OpSelectionMerge %[[#bb55:]] None +; CHECK: OpBranchConditional %[[#]] %[[#bb56:]] %[[#bb55:]] +; CHECK: %[[#bb56:]] = OpLabel +; CHECK: OpBranch %[[#bb55:]] +; CHECK: %[[#bb55:]] = OpLabel +; CHECK: OpBranch %[[#bb57:]] +; CHECK: %[[#bb57:]] = OpLabel ; CHECK: OpReturn ; CHECK: OpFunctionEnd -; CHECK: %[[#func_35:]] = OpFunction %[[#void:]] None %[[#]] -; CHECK: %[[#bb51:]] = OpLabel +; CHECK: %[[#func_40:]] = OpFunction %[[#void:]] None %[[#]] +; CHECK: %[[#bb58:]] = OpLabel ; CHECK: OpReturn ; CHECK: OpFunctionEnd - - target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan1.3-compute" diff --git a/llvm/test/CodeGen/SPIRV/structurizer/condition-linear.ll b/llvm/test/CodeGen/SPIRV/structurizer/condition-linear.ll index faab2553ae6f51..71f3ce9263da56 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/condition-linear.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/condition-linear.ll @@ -1,5 +1,5 @@ +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan-compute" @@ -25,90 +25,92 @@ entry: ret i32 1 } + +; CHECK-DAG: OpName %[[#reg_0:]] "cond.reg2mem" +; CHECK-DAG: OpName %[[#reg_1:]] "cond9.reg2mem" + define internal spir_func void @main() #0 { -; CHECK: %[[#cond:]] = OpINotEqual %[[#bool_ty:]] %[[#a:]] %[[#b:]] -; CHECK: OpSelectionMerge %[[#cond_end:]] None -; CHECK: OpBranchConditional %[[#cond]] %[[#cond_true:]] %[[#cond_false:]] +; CHECK: OpSelectionMerge %[[#cond1_merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#cond1_true:]] %[[#cond1_false:]] entry: %0 = call token @llvm.experimental.convergence.entry() %a = alloca i32, align 4 %b = alloca i32, align 4 - %c = alloca i32, align 4 - %val = alloca i32, align 4 - store i32 0, ptr %val, align 4 - %1 = load i32, ptr %a, align 4 - %tobool = icmp ne i32 %1, 0 - br i1 %tobool, label %cond.true, label %cond.false - -; CHECK: %[[#cond_true]] = OpLabel -; CHECK: OpBranch %[[#cond_end]] -cond.true: - %2 = load i32, ptr %b, align 4 - br label %cond.end - -; CHECK: %[[#cond_false]] = OpLabel -; CHECK: OpBranch %[[#cond_end]] -cond.false: - %3 = load i32, ptr %c, align 4 - br label %cond.end - -; CHECK: %[[#cond_end]] = OpLabel -; CHECK: %[[#tmp:]] = OpPhi %[[#int_ty:]] %[[#load_cond_true:]] %[[#cond_true]] %[[#load_cond_false:]] %[[#cond_false:]] -; CHECK: %[[#cond:]] = OpINotEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0:]] -; CHECK: OpSelectionMerge %[[#if_end:]] None -; CHECK: OpBranchConditional %[[#cond]] %[[#if_then:]] %[[#if_end]] -cond.end: - %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ] + br i1 true, label %cond1_true, label %cond1_false + +; CHECK: %[[#cond1_true]] = OpLabel +; CHECK: OpStore %[[#reg_0]] %[[#]] +; CHECK: OpBranch %[[#cond1_merge]] +cond1_true: + %2 = load i32, ptr %a, align 4 + br label %cond1_merge + +; CHECK: %[[#cond1_false]] = OpLabel +; CHECK: OpStore %[[#reg_0]] %[[#]] +; CHECK: OpBranch %[[#cond1_merge]] +cond1_false: + %3 = load i32, ptr %b, align 4 + br label %cond1_merge + +; CHECK: %[[#cond1_merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#reg_0]] +; CHECK: %[[#cond:]] = OpINotEqual %[[#]] %[[#tmp]] %[[#]] +; CHECK: OpSelectionMerge %[[#cond2_merge:]] None +; CHECK: OpBranchConditional %[[#cond]] %[[#cond2_true:]] %[[#cond2_merge]] +cond1_merge: + %cond = phi i32 [ %2, %cond1_true ], [ %3, %cond1_false ] %tobool1 = icmp ne i32 %cond, 0 - br i1 %tobool1, label %if.then, label %if.end - -; CHECK: %[[#if_then]] = OpLabel -; CHECK: OpBranch %[[#if_end]] -if.then: - %4 = load i32, ptr %val, align 4 - %inc = add nsw i32 %4, 1 - store i32 %inc, ptr %val, align 4 - br label %if.end - -; CHECK: %[[#if_end]] = OpLabel -; CHECK: OpSelectionMerge %[[#cond_end8:]] None -; CHECK: OpBranchConditional %[[#tmp:]] %[[#cond4_true:]] %[[#cond_false6:]] -if.end: + br i1 %tobool1, label %cond2_true, label %cond2_merge + +; CHECK: %[[#cond2_true]] = OpLabel +; CHECK: OpBranch %[[#cond2_merge]] +cond2_true: + store i32 0, ptr %a + br label %cond2_merge + +; CHECK: %[[#cond2_merge]] = OpLabel +; CHECK: OpFunctionCall +; CHECK: OpSelectionMerge %[[#cond3_merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#cond3_true:]] %[[#cond3_false:]] +cond2_merge: %call2 = call spir_func noundef i32 @fn() #4 [ "convergencectrl"(token %0) ] - %tobool3 = icmp ne i32 %call2, 0 - br i1 %tobool3, label %cond.true4, label %cond.false6 + br i1 true, label %cond3_true, label %cond3_false -; CHECK: %[[#cond4_true]] = OpLabel -; CHECK: OpBranch %[[#cond_end8]] -cond.true4: +; CHECK: %[[#cond3_true]] = OpLabel +; CHECK: OpFunctionCall +; CHECK: OpStore %[[#reg_1]] %[[#]] +; CHECK: OpBranch %[[#cond3_merge]] +cond3_true: %call5 = call spir_func noundef i32 @fn1() #4 [ "convergencectrl"(token %0) ] - br label %cond.end8 + br label %cond3_merge -; CHECK: %[[#cond_false6]] = OpLabel -; CHECK: OpBranch %[[#cond_end8]] -cond.false6: +; CHECK: %[[#cond3_false]] = OpLabel +; CHECK: OpFunctionCall +; CHECK: OpStore %[[#reg_1]] %[[#]] +; CHECK: OpBranch %[[#cond3_merge]] +cond3_false: %call7 = call spir_func noundef i32 @fn2() #4 [ "convergencectrl"(token %0) ] - br label %cond.end8 - -; CHECK: %[[#cond_end8]] = OpLabel -; CHECK: OpSelectionMerge %[[#if_end13:]] None -; CHECK: OpBranchConditional %[[#tmp:]] %[[#if_then11:]] %[[#if_end13]] -cond.end8: - %cond9 = phi i32 [ %call5, %cond.true4 ], [ %call7, %cond.false6 ] + br label %cond3_merge + +; CHECK: %[[#cond3_merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#reg_1]] +; CHECK: %[[#cond:]] = OpINotEqual %[[#]] %[[#tmp]] %[[#]] +; CHECK: OpSelectionMerge %[[#cond4_merge:]] None +; CHECK: OpBranchConditional %[[#cond]] %[[#cond4_true:]] %[[#cond4_merge]] +cond3_merge: + %cond9 = phi i32 [ %call5, %cond3_true ], [ %call7, %cond3_false ] %tobool10 = icmp ne i32 %cond9, 0 - br i1 %tobool10, label %if.then11, label %if.end13 + br i1 %tobool10, label %cond4_true, label %cond4_merge -; CHECK: %[[#if_then11]] = OpLabel -; CHECK: OpBranch %[[#if_end13]] -if.then11: - %5 = load i32, ptr %val, align 4 - %inc12 = add nsw i32 %5, 1 - store i32 %inc12, ptr %val, align 4 - br label %if.end13 +; CHECK: %[[#cond4_true]] = OpLabel +; CHECK: OpBranch %[[#cond4_merge]] +cond4_true: + store i32 0, ptr %a + br label %cond4_merge -; CHECK: %[[#if_end13]] = OpLabel +; CHECK: %[[#cond4_merge]] = OpLabel ; CHECK: OpReturn -if.end13: +cond4_merge: ret void } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/do-continue.ll b/llvm/test/CodeGen/SPIRV/structurizer/do-continue.ll index d547ad8eded976..05071d03164d5f 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/do-continue.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/do-continue.ll @@ -1,5 +1,5 @@ -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan1.3-compute" @@ -12,100 +12,98 @@ entry: define internal spir_func void @main() #2 { ; CHECK: %[[#entry:]] = OpLabel -; CHECK: OpBranch %[[#do_body:]] +; CHECK: OpBranch %[[#do_header:]] entry: %0 = call token @llvm.experimental.convergence.entry() - %val = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, ptr %val, align 4 - store i32 0, ptr %i, align 4 - br label %do.body + %var = alloca i32, align 4 + br label %do_header +; Here a the loop header had to be split in two: +; - 1 header for the loop +; - 1 header for the condition. +; In SPIR-V, a loop header cannot directly ; CHECK: %[[#do_header:]] = OpLabel -; CHECK: OpLoopMerge %[[#do_end:]] %[[#do_cond:]] None -; CHECK: OpBranch %[[#do_body:]] +; CHECK: OpLoopMerge %[[#do_merge:]] %[[#do_latch:]] None +; CHECK: OpBranch %[[#new_header:]] -; CHECK: %[[#do_body]] = OpLabel -; CHECK: OpSelectionMerge %[[#if_then:]] None -; CHECK: OpBranchConditional %[[#cond:]] %[[#if_then]] %[[#if_end:]] -do.body: +; CHECK: %[[#new_header]] = OpLabel +; CHECK: OpSelectionMerge %[[#if_merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#if_then:]] %[[#if_end:]] +do_header: %1 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] - %2 = load i32, ptr %i, align 4 - %inc = add nsw i32 %2, 1 - store i32 %inc, ptr %i, align 4 - %3 = load i32, ptr %i, align 4 - %cmp = icmp sgt i32 %3, 5 - br i1 %cmp, label %if.then, label %if.end + store i32 0, ptr %var + br i1 true, label %if.then, label %if.end + +; CHECK: %[[#if_then]] = OpLabel +; CHECK: OpBranch %[[#if_merge]] +if.then: + store i32 0, ptr %var + br label %do_latch ; CHECK: %[[#if_end]] = OpLabel -; CHECK: OpBranch %[[#if_then]] +; CHECK: OpBranch %[[#if_merge]] if.end: - %4 = load i32, ptr %i, align 4 - store i32 %4, ptr %val, align 4 - br label %do.cond + store i32 0, ptr %var + br label %do_latch -; CHECK: %[[#if_then]] = OpLabel -; CHECK: OpBranch %[[#do_cond]] -if.then: - br label %do.cond +; CHECK: %[[#if_merge]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do_latch]] %[[#do_merge]] -; CHECK: %[[#do_cond]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_header]] %[[#do_end]] -do.cond: - %5 = load i32, ptr %i, align 4 - %cmp1 = icmp slt i32 %5, 10 - br i1 %cmp1, label %do.body, label %do.end +; CHECK: %[[#do_latch]] = OpLabel +; CHECK: OpBranch %[[#do_header]] +do_latch: + store i32 0, ptr %var + br i1 true, label %do_header, label %do.end -; CHECK: %[[#do_end]] = OpLabel -; CHECK: OpBranch %[[#do_body2:]] +; CHECK: %[[#do_merge]] = OpLabel +; CHECK: OpBranch %[[#do2_header:]] do.end: - br label %do.body2 + store i32 0, ptr %var + br label %do2_header -; CHECK: %[[#do_body2]] = OpLabel -; CHECK: OpLoopMerge %[[#do_end11:]] %[[#do_cond9:]] None -; CHECK: OpBranch %[[#do_body4:]] -do.body2: +; CHECK: %[[#do2_header]] = OpLabel +; CHECK: OpLoopMerge %[[#do2_merge:]] %[[#do2_continue:]] None +; CHECK: OpBranch %[[#do3_header:]] +do2_header: %6 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] - %7 = load i32, ptr %i, align 4 - %inc3 = add nsw i32 %7, 1 - store i32 %inc3, ptr %i, align 4 - br label %do.body4 - -; CHECK: %[[#do_body4]] = OpLabel -; CHECK: OpLoopMerge %[[#do_end8:]] %[[#do_cond6:]] None -; CHECK: OpBranch %[[#do_cond6]] -do.body4: + store i32 0, ptr %var + br label %do3_header + +; CHECK: %[[#do3_header]] = OpLabel +; CHECK: OpLoopMerge %[[#do3_merge:]] %[[#do3_continue:]] None +; CHECK: OpBranch %[[#do3_body:]] +do3_header: %8 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %6) ] - %9 = load i32, ptr %val, align 4 - %inc5 = add nsw i32 %9, 1 - store i32 %inc5, ptr %val, align 4 - br label %do.cond6 - -; CHECK: %[[#do_cond6]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_body4]] %[[#do_end8]] -do.cond6: - %10 = load i32, ptr %i, align 4 - %cmp7 = icmp slt i32 %10, 10 - br i1 %cmp7, label %do.body4, label %do.end8 - -; CHECK: %[[#do_end8]] = OpLabel -; CHECK: OpBranch %[[#do_cond9]] -do.end8: - %11 = load i32, ptr %i, align 4 - %dec = add nsw i32 %11, -1 - store i32 %dec, ptr %i, align 4 - br label %do.cond9 - -; CHECK: %[[#do_cond9]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_body2]] %[[#do_end11]] -do.cond9: - %12 = load i32, ptr %val, align 4 - %cmp10 = icmp slt i32 %12, 10 - br i1 %cmp10, label %do.body2, label %do.end11 - -; CHECK: %[[#do_end11]] = OpLabel -; CHECK: OpReturn -do.end11: + store i32 0, ptr %var + br label %do3_continue + +; CHECK: %[[#do3_body]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do3_continue]] %[[#do3_merge]] + +; CHECK: %[[#do3_continue]] = OpLabel +; CHECK: OpBranch %[[#do3_header]] +do3_continue: + store i32 0, ptr %var + br i1 true, label %do3_header, label %do3_merge + +; CHECK: %[[#do3_merge]] = OpLabel +; CHECK: OpBranch %[[#do2_new_latch:]] +do3_merge: + store i32 0, ptr %var + br label %do2_continue + +; CHECK: %[[#do2_new_latch]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do2_continue]] %[[#do2_merge]] + +; CHECK: %[[#do2_continue]] = OpLabel +; CHECK: OpBranch %[[#do2_header]] +do2_continue: + store i32 0, ptr %var + br i1 true, label %do2_header, label %do2_merge + +; CHECK: %[[#do2_merge]] = OpLabel +; CHECK: OpReturn +do2_merge: ret void } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/do-nested.ll b/llvm/test/CodeGen/SPIRV/structurizer/do-nested.ll index a16eed5cdfb4ce..bef95f5f63bf7b 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/do-nested.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/do-nested.ll @@ -1,5 +1,5 @@ -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan1.3-compute" @@ -8,84 +8,72 @@ define internal spir_func void @main() #0 { ; CHECK: %[[#entry:]] = OpLabel entry: %0 = call token @llvm.experimental.convergence.entry() - %val = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - store i32 0, ptr %val, align 4 - store i32 0, ptr %i, align 4 - store i32 0, ptr %j, align 4 - store i32 0, ptr %k, align 4 - br label %do.body + %var = alloca i32, align 4 + br label %do1_header -; CHECK: %[[#do_1_header:]] = OpLabel -; CHECK: OpLoopMerge %[[#end:]] %[[#do_1_latch:]] None -; CHECK: OpBranch %[[#do_2_header:]] -do.body: +; CHECK: %[[#do1_header:]] = OpLabel +; CHECK: OpLoopMerge %[[#do1_merge:]] %[[#do1_continue:]] None +; CHECK: OpBranch %[[#do2_header:]] +do1_header: %1 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] - %2 = load i32, ptr %val, align 4 - %3 = load i32, ptr %i, align 4 - %add = add nsw i32 %2, %3 - store i32 %add, ptr %val, align 4 - br label %do.body1 + store i32 0, ptr %var + br label %do2_header -; CHECK: %[[#do_2_header]] = OpLabel -; CHECK: OpLoopMerge %[[#do_2_end:]] %[[#do_2_latch:]] None -; CHECK: OpBranch %[[#do_2_body:]] -do.body1: +; CHECK: %[[#do2_header:]] = OpLabel +; CHECK: OpLoopMerge %[[#do2_merge:]] %[[#do2_continue:]] None +; CHECK: OpBranch %[[#do3_header:]] +do2_header: %4 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %1) ] - br label %do.body2 + store i32 0, ptr %var + br label %do3_header -; CHECK: %[[#do_2_body]] = OpLabel -; CHECK: OpLoopMerge %[[#do_3_end:]] %[[#do_3_header:]] None -; CHECK: OpBranch %[[#do_3_header]] -do.body2: +; CHECK: %[[#do3_header:]] = OpLabel +; CHECK: OpLoopMerge %[[#do3_merge:]] %[[#do3_continue:]] None +; CHECK: OpBranch %[[#do3_cond:]] +do3_header: %5 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %4) ] - %6 = load i32, ptr %k, align 4 - %inc = add nsw i32 %6, 1 - store i32 %inc, ptr %k, align 4 - br label %do.cond + store i32 0, ptr %var + br label %do3_continue -; CHECK: %[[#do_3_header]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_2_body]] %[[#do_3_end]] -do.cond: - %7 = load i32, ptr %k, align 4 - %cmp = icmp slt i32 %7, 30 - br i1 %cmp, label %do.body2, label %do.end +; CHECK: %[[#do3_cond]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do3_continue]] %[[#do3_merge]] +; CHECK: %[[#do3_continue]] = OpLabel +; CHECK: OpBranch %[[#do3_header]] +do3_continue: + store i32 0, ptr %var + br i1 true, label %do3_header, label %do3_merge -; CHECK: %[[#do_3_end]] = OpLabel -; CHECK: OpBranch %[[#do_2_latch]] -do.end: - %8 = load i32, ptr %j, align 4 - %inc3 = add nsw i32 %8, 1 - store i32 %inc3, ptr %j, align 4 - br label %do.cond4 +; CHECK: %[[#do3_merge]] = OpLabel +; CHECK: OpBranch %[[#do2_cond:]] +do3_merge: + store i32 0, ptr %var + br label %do2_continue -; CHECK: %[[#do_2_latch]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_2_header]] %[[#do_2_end]] -do.cond4: - %9 = load i32, ptr %j, align 4 - %cmp5 = icmp slt i32 %9, 20 - br i1 %cmp5, label %do.body1, label %do.end6 +; CHECK: %[[#do2_cond]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do2_continue]] %[[#do2_merge]] +; CHECK: %[[#do2_continue]] = OpLabel +; CHECK: OpBranch %[[#do2_header]] +do2_continue: + store i32 0, ptr %var + br i1 true, label %do2_header, label %do2_merge -; CHECK: %[[#do_2_end]] = OpLabel -; CHECK: OpBranch %[[#do_1_latch]] -do.end6: - %10 = load i32, ptr %i, align 4 - %inc7 = add nsw i32 %10, 1 - store i32 %inc7, ptr %i, align 4 - br label %do.cond8 +; CHECK: %[[#do2_merge]] = OpLabel +; CHECK: OpBranch %[[#do1_cond:]] +do2_merge: + store i32 0, ptr %var + br label %do1_continue -; CHECK: %[[#do_1_latch]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_1_header]] %[[#end]] -do.cond8: - %11 = load i32, ptr %i, align 4 - %cmp9 = icmp slt i32 %11, 10 - br i1 %cmp9, label %do.body, label %do.end10 +; CHECK: %[[#do1_cond]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do1_continue]] %[[#do1_merge]] +; CHECK: %[[#do1_continue]] = OpLabel +; CHECK: OpBranch %[[#do1_header]] +do1_continue: + store i32 0, ptr %var + br i1 true, label %do1_header, label %do1_merge -; CHECK: %[[#end]] = OpLabel -; CHECK: OpReturn -do.end10: +; CHECK: %[[#do1_merge]] = OpLabel +; CHECK: OpReturn +do1_merge: ret void } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/do-plain.ll b/llvm/test/CodeGen/SPIRV/structurizer/do-plain.ll index 6d4a0e591cf512..9f84fc317021f0 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/do-plain.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/do-plain.ll @@ -15,71 +15,75 @@ entry: define internal spir_func void @main() #2 { ; CHECK: %[[#entry:]] = OpLabel -; CHECK: OpBranch %[[#do_body:]] +; CHECK: OpBranch %[[#do1_header:]] entry: %0 = call token @llvm.experimental.convergence.entry() - %val = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, ptr %val, align 4 - store i32 0, ptr %i, align 4 - br label %do.body - -; CHECK: %[[#do_body]] = OpLabel -; CHECK: OpLoopMerge %[[#do_end:]] %[[#do_cond:]] None -; CHECK: OpBranch %[[#do_cond]] -do.body: + %var = alloca i32, align 4 + br label %do1_header + +; CHECK: %[[#do1_header]] = OpLabel +; CHECK: OpLoopMerge %[[#do1_merge:]] %[[#do1_continue:]] None +; CHECK: OpBranch %[[#do1_cond:]] +do1_header: %1 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] - %2 = load i32, ptr %i, align 4 - store i32 %2, ptr %val, align 4 - br label %do.cond - -; CHECK: %[[#do_cond]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_body]] %[[#do_end]] -do.cond: - %3 = load i32, ptr %i, align 4 - %cmp = icmp slt i32 %3, 10 - br i1 %cmp, label %do.body, label %do.end - -; CHECK: %[[#do_end]] = OpLabel -; CHECK: OpBranch %[[#do_body1:]] -do.end: - br label %do.body1 - -; CHECK: %[[#do_body1]] = OpLabel -; CHECK: OpLoopMerge %[[#do_end3:]] %[[#do_cond2:]] None -; CHECK: OpBranch %[[#do_cond2]] -do.body1: + store i32 0, ptr %var + br label %do1_continue + +; CHECK: %[[#do1_cond]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do1_continue]] %[[#do1_merge]] +; CHECK: %[[#do1_continue]] = OpLabel +; CHECK: OpBranch %[[#do1_header]] +do1_continue: + store i32 0, ptr %var + br i1 true, label %do1_header, label %do1_merge + +; CHECK: %[[#do1_merge]] = OpLabel +; CHECK: OpBranch %[[#do2_header:]] +do1_merge: + store i32 0, ptr %var + br label %do2_header + +; CHECK: %[[#do2_header]] = OpLabel +; CHECK: OpLoopMerge %[[#do2_merge:]] %[[#do2_continue:]] None +; CHECK: OpBranch %[[#do2_cond:]] +do2_header: %4 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] - store i32 0, ptr %val, align 4 - br label %do.cond2 - -; CHECK: %[[#do_cond2]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_body1]] %[[#do_end3]] -do.cond2: - br i1 true, label %do.body1, label %do.end3 - -; CHECK: %[[#do_end3]] = OpLabel -; CHECK: OpBranch %[[#do_body4:]] -do.end3: - br label %do.body4 - -; CHECK: %[[#do_body4]] = OpLabel -; CHECK: OpLoopMerge %[[#do_end7:]] %[[#do_cond5:]] None -; CHECK: OpBranch %[[#do_cond5]] -do.body4: + store i32 0, ptr %var + br label %do2_continue + +; CHECK: %[[#do2_cond]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do2_continue]] %[[#do2_merge]] +; CHECK: %[[#do2_continue]] = OpLabel +; CHECK: OpBranch %[[#do2_header]] +do2_continue: + store i32 0, ptr %var + br i1 true, label %do2_header, label %do2_merge + +; CHECK: %[[#do2_merge]] = OpLabel +; CHECK: OpBranch %[[#do3_header:]] +do2_merge: + store i32 0, ptr %var + br label %do3_header + +; CHECK: %[[#do3_header]] = OpLabel +; CHECK: OpLoopMerge %[[#do3_merge:]] %[[#do3_continue:]] None +; CHECK: OpBranch %[[#do3_cond:]] +do3_header: %5 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] - br label %do.cond5 - -; CHECK: %[[#do_cond5]] = OpLabel -; CHECK: OpBranchConditional %[[#cond:]] %[[#do_body4]] %[[#do_end7]] -do.cond5: - %6 = load i32, ptr %val, align 4 - %cmp6 = icmp slt i32 %6, 20 - br i1 %cmp6, label %do.body4, label %do.end7 - -; CHECK: %[[#do_end7]] = OpLabel -; CHECK: OpReturn -do.end7: + store i32 0, ptr %var + br label %do3_continue + +; CHECK: %[[#do3_cond]] = OpLabel +; CHECK: OpBranchConditional %[[#]] %[[#do3_continue]] %[[#do3_merge]] +; CHECK: %[[#do3_continue]] = OpLabel +; CHECK: OpBranch %[[#do3_header]] +do3_continue: + store i32 0, ptr %var + br i1 true, label %do3_header, label %do3_merge + +; CHECK: %[[#do3_merge]] = OpLabel +; CHECK: OpReturn +do3_merge: ret void } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/logical-or.ll b/llvm/test/CodeGen/SPIRV/structurizer/logical-or.ll index 26b12a1e14f0e2..235f15b0644704 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/logical-or.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/logical-or.ll @@ -1,84 +1,77 @@ +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - --asm-verbose=0 | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - --asm-verbose=0 | FileCheck %s --match-full-lines target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan1.3-compute" -; CHECK-DAG: OpName %[[#fn:]] "fn" -; CHECK-DAG: OpName %[[#main:]] "main" -; CHECK-DAG: OpName %[[#var_a:]] "a" -; CHECK-DAG: OpName %[[#var_b:]] "b" +define internal spir_func void @main() #3 { +; CHECK-DAG: OpName %[[#switch_0:]] "reg1" +; CHECK-DAG: OpName %[[#switch_1:]] "reg" -; CHECK-DAG: %[[#bool:]] = OpTypeBool -; CHECK-DAG: %[[#true:]] = OpConstantTrue %[[#bool]] +; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#]] 0 +; CHECK-DAG: %[[#int_1:]] = OpConstant %[[#]] 1 -; CHECK: %[[#fn]] = OpFunction %[[#param:]] DontInline %[[#ftype:]] -define spir_func noundef i32 @fn() #0 { -entry: - %0 = call token @llvm.experimental.convergence.entry() - ret i32 1 -} +; CHECK: %[[#entry:]] = OpLabel +; CHECK-DAG: %[[#switch_0]] = OpVariable %[[#]] Function +; CHECK-DAG: %[[#switch_1]] = OpVariable %[[#]] Function +; CHECK: OpSelectionMerge %[[#merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#new_header:]] %[[#unreachable:]] -; CHECK: %[[#main]] = OpFunction %[[#param:]] DontInline %[[#ftype:]] +; CHECK: %[[#new_header]] = OpLabel +; CHECK: OpSelectionMerge %[[#new_merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#taint_true_merge:]] %[[#br_false:]] -define internal spir_func void @main() #3 { +; CHECK: %[[#unreachable]] = OpLabel +; CHECK-NEXT: OpUnreachable -; CHECK: %[[#entry:]] = OpLabel -; CHECK-DAG: %[[#var_a]] = OpVariable %[[#type:]] Function -; CHECK-DAG: %[[#var_b]] = OpVariable %[[#type:]] Function -; CHECK: %[[#tmp:]] = OpLoad %[[#type:]] %[[#var_a]] Aligned 4 -; CHECK: %[[#cond:]] = OpINotEqual %[[#bool]] %[[#tmp]] %[[#const:]] -; CHECK: OpSelectionMerge %[[#if_end:]] None -; CHECK: OpBranchConditional %[[#true]] %[[#cond1:]] %[[#dead:]] +; CHECK: %[[#taint_true_merge]] = OpLabel +; CHECK: OpStore %[[#switch_0]] %[[#int_1]] +; CHECK: OpBranch %[[#new_merge]] -; CHECK: %[[#cond1]] = OpLabel -; CHECK: OpSelectionMerge %[[#new_exit:]] None -; CHECK: OpBranchConditional %[[#cond]] %[[#new_exit]] %[[#lor_lhs_false:]] +; CHECK: %[[#br_false]] = OpLabel +; CHECK-DAG: OpStore %[[#switch_1]] %[[#int_0]] +; CHECK: OpSelectionMerge %[[#taint_merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#taint_merge]] %[[#taint_false:]] -; CHECK: %[[#dead]] = OpLabel -; CHECK-NEXT: OpUnreachable +; CHECK: %[[#taint_false]] = OpLabel +; CHECK: OpStore %[[#switch_1]] %[[#int_1]] +; CHECK: OpBranch %[[#taint_merge]] -; CHECK: %[[#lor_lhs_false]] = OpLabel -; CHECK: %[[#tmp:]] = OpLoad %[[#type:]] %[[#var_b]] Aligned 4 -; CHECK: %[[#cond:]] = OpINotEqual %[[#bool]] %[[#tmp]] %[[#value:]] -; CHECK: OpBranchConditional %[[#cond]] %[[#new_exit]] %[[#alias_exit:]] +; CHECK: %[[#taint_merge]] = OpLabel +; CHECK: OpStore %[[#switch_0]] %[[#int_0]] +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#switch_1]] +; CHECK: %[[#cond:]] = OpIEqual %[[#]] %[[#int_0]] %[[#tmp]] +; CHECK: OpBranchConditional %[[#cond]] %[[#taint_false_true:]] %[[#new_merge]] -; CHECK: %[[#alias_exit]] = OpLabel -; CHECK: OpBranch %[[#new_exit]] +; CHECK: %[[#taint_false_true]] = OpLabel +; CHECK: OpStore %[[#switch_0]] %[[#int_1]] +; CHECK: OpBranch %[[#new_merge]] -; CHECK: %[[#new_exit]] = OpLabel -; CHECK: %[[#tmp:]] = OpPhi %[[#type:]] %[[#A:]] %[[#cond1]] %[[#A:]] %[[#lor_lhs_false]] %[[#B:]] %[[#alias_exit]] -; CHECK: %[[#cond:]] = OpIEqual %[[#bool]] %[[#A]] %[[#tmp]] -; CHECK: OpBranchConditional %[[#cond]] %[[#if_then:]] %[[#if_end]] +; CHECK: %[[#new_merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#switch_0]] +; CHECK: %[[#cond:]] = OpIEqual %[[#]] %[[#int_0]] %[[#tmp]] +; CHECK: OpBranchConditional %[[#cond]] %[[#merge]] %[[#br_true:]] -; CHECK: %[[#if_then]] = OpLabel -; CHECK: OpBranch %[[#if_end]] +; CHECK: %[[#br_true]] = OpLabel +; CHECK: OpBranch %[[#merge]] -; CHECK: %[[#if_end]] = OpLabel -; CHECK: OpReturn +; CHECK: %[[#merge]] = OpLabel +; CHECK: OpReturn entry: %0 = call token @llvm.experimental.convergence.entry() - %a = alloca i32, align 4 - %b = alloca i32, align 4 - %val = alloca i32, align 4 - store i32 0, ptr %val, align 4 - %1 = load i32, ptr %a, align 4 - %tobool = icmp ne i32 %1, 0 - br i1 %tobool, label %if.then, label %lor.lhs.false - -lor.lhs.false: - %2 = load i32, ptr %b, align 4 - %tobool1 = icmp ne i32 %2, 0 - br i1 %tobool1, label %if.then, label %if.end - -if.then: - %8 = load i32, ptr %val, align 4 - %inc = add nsw i32 %8, 1 - store i32 %inc, ptr %val, align 4 - br label %if.end - -if.end: + %var = alloca i32, align 4 + br i1 true, label %br_true, label %br_false + +br_false: + store i32 0, ptr %var, align 4 + br i1 true, label %br_true, label %merge + +br_true: + store i32 0, ptr %var, align 4 + br label %merge + +merge: ret void } diff --git a/llvm/test/CodeGen/SPIRV/structurizer/loop-continue-split.ll b/llvm/test/CodeGen/SPIRV/structurizer/loop-continue-split.ll new file mode 100644 index 00000000000000..5a5ea002e0942f --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/structurizer/loop-continue-split.ll @@ -0,0 +1,104 @@ +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s + +; The goal of this test is to voluntarily create 2 overlapping convergence +; structures: the loop, and the inner condition. +; Here, the condition header also branches to 2 internal nodes, which are not +; directly a merge/exits. +; This will require a proper header-split. +; In addition, splitting the header makes the continue the merge of the inner +; condition, so we need to properly split the continue block to create a +; valid inner merge, in the correct order. + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" +target triple = "spirv-unknown-vulkan1.3-compute" + +; CHECK-DAG: OpName %[[#switch_0:]] "reg1" +; CHECK-DAG: OpName %[[#variable:]] "var" + +; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#]] 0 +; CHECK-DAG: %[[#int_1:]] = OpConstant %[[#]] 1 +; CHECK-DAG: %[[#int_2:]] = OpConstant %[[#]] 2 +; CHECK-DAG: %[[#int_3:]] = OpConstant %[[#]] 3 +; CHECK-DAG: %[[#int_4:]] = OpConstant %[[#]] 4 + +define internal spir_func void @main() #1 { +; CHECK: %[[#entry:]] = OpLabel +; CHECK: %[[#switch_0]] = OpVariable %[[#]] Function +; CHECK: %[[#variable]] = OpVariable %[[#]] Function +; CHECK: OpBranch %[[#header:]] +entry: + %0 = call token @llvm.experimental.convergence.entry() + %var = alloca i32, align 4 + br label %header + +; CHECK: %[[#header]] = OpLabel +; CHECK: OpLoopMerge %[[#merge:]] %[[#continue:]] None +; CHECK: OpBranch %[[#split_header:]] + +; CHECK: %[[#split_header]] = OpLabel +; CHECK: OpSelectionMerge %[[#inner_merge:]] None +; CHECK: OpBranchConditional %[[#]] %[[#left:]] %[[#right:]] +header: + %2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ] + br i1 true, label %left, label %right + +; CHECK: %[[#left]] = OpLabel +; CHECK-DAG: OpStore %[[#switch_0]] %[[#int_0]] +; CHECK-DAG: OpStore %[[#variable]] %[[#int_1]] +; CHECK: OpBranchConditional %[[#]] %[[#inner_merge]] %[[#left_next:]] +left: + store i32 1, ptr %var + br i1 true, label %merge, label %left_next + +; CHECK: %[[#right]] = OpLabel +; CHECK-DAG: OpStore %[[#switch_0]] %[[#int_0]] +; CHECK-DAG: OpStore %[[#variable]] %[[#int_2]] +; CHECK: OpBranchConditional %[[#]] %[[#inner_merge]] %[[#right_next:]] +right: + store i32 2, ptr %var + br i1 true, label %merge, label %right_next + +; CHECK: %[[#left_next]] = OpLabel +; CHECK-DAG: OpStore %[[#switch_0]] %[[#int_1]] +; CHECK-DAG: OpStore %[[#variable]] %[[#int_3]] +; CHECK: OpBranch %[[#inner_merge]] +left_next: + store i32 3, ptr %var + br label %continue + +; CHECK: %[[#right_next]] = OpLabel +; CHECK-DAG: OpStore %[[#switch_0]] %[[#int_1]] +; CHECK-DAG: OpStore %[[#variable]] %[[#int_4]] +; CHECK: OpBranch %[[#inner_merge]] +right_next: + store i32 4, ptr %var + br label %continue + +; CHECK: %[[#inner_merge]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#switch_0]] +; CHECK: %[[#cond:]] = OpIEqual %[[#]] %[[#int_0]] %[[#tmp]] +; CHECK: OpBranchConditional %[[#cond]] %[[#merge]] %[[#continue]] + +; CHECK: %[[#continue]] = OpLabel +; CHECK: OpBranch %[[#header]] +continue: + br label %header + +; CHECK: %[[#merge]] = OpLabel +; CHECK: OpReturn +merge: + ret void +} + + +declare token @llvm.experimental.convergence.entry() #0 +declare token @llvm.experimental.convergence.loop() #0 + +attributes #0 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #1 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} diff --git a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll index a9a0397718e1f5..b421ae7990c67a 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-break.ll @@ -1,23 +1,23 @@ -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan-compute" define internal spir_func void @main() #0 { -; CHECK: OpDecorate %[[#builtin:]] BuiltIn SubgroupLocalInvocationId -; CHECK-DAG: %[[#int_ty:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#int_fpty:]] = OpTypePointer Function %[[#int_ty]] +; CHECK-DAG: OpName %[[#idx:]] "idx" +; CHECK-DAG: OpDecorate %[[#builtin:]] BuiltIn SubgroupLocalInvocationId +; CHECK-DAG: %[[#int_ty:]] = OpTypeInt 32 0 ; CHECK-DAG: %[[#int_ipty:]] = OpTypePointer Input %[[#int_ty]] -; CHECK-DAG: %[[#bool_ty:]] = OpTypeBool -; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#int_ty]] 0 -; CHECK-DAG: %[[#int_10:]] = OpConstant %[[#int_ty]] 10 -; CHECK-DAG: %[[#builtin]] = OpVariable %[[#int_ipty]] Input +; CHECK-DAG: %[[#bool_ty:]] = OpTypeBool +; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#int_ty]] 0 +; CHECK-DAG: %[[#int_10:]] = OpConstant %[[#int_ty]] 10 +; CHECK-DAG: %[[#builtin]] = OpVariable %[[#int_ipty]] Input ; CHECK: %[[#entry:]] = OpLabel -; CHECK: %[[#idx:]] = OpVariable %[[#int_fpty]] Function -; CHECK: OpStore %[[#idx]] %[[#int_0]] Aligned 4 +; CHECK: %[[#idx]] = OpVariable %[[#]] Function +; ACHECK: OpStore %[[#idx]] %[[#int_0]] Aligned 4 ; CHECK: OpBranch %[[#while_cond:]] entry: %0 = call token @llvm.experimental.convergence.entry() @@ -37,12 +37,12 @@ while.cond: %cmp = icmp ne i32 %2, 10 br i1 %cmp, label %while.body, label %while.end -; CHECK: %[[#while_body]] = OpLabel -; CHECK-NEXT: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#builtin]] Aligned 1 -; CHECK-NEXT: OpStore %[[#idx]] %[[#tmp]] Aligned 4 -; CHECK-NEXT: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#idx]] Aligned 4 -; CHECK-NEXT: %[[#cmp1:]] = OpIEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0]] -; CHECK: OpBranchConditional %[[#cmp1]] %[[#new_end]] %[[#if_end]] +; CHECK: %[[#while_body]] = OpLabel +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#builtin]] Aligned 1 +; CHECK: OpStore %[[#idx]] %[[#tmp]] Aligned 4 +; CHECK: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#idx]] Aligned 4 +; CHECK: %[[#cmp1:]] = OpIEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0]] +; CHECK: OpBranchConditional %[[#cmp1]] %[[#new_end]] %[[#if_end]] while.body: %3 = call i32 @__hlsl_wave_get_lane_index() [ "convergencectrl"(token %1) ] store i32 %3, ptr %idx, align 4 @@ -50,14 +50,14 @@ while.body: %cmp1 = icmp eq i32 %4, 0 br i1 %cmp1, label %if.then, label %if.end +; CHECK: %[[#if_end]] = OpLabel +; CHECK: OpBranch %[[#while_cond]] + ; CHECK: %[[#new_end]] = OpLabel ; CHECK: OpBranch %[[#while_end:]] - if.then: br label %while.end -; CHECK: %[[#if_end]] = OpLabel -; CHECK: OpBranch %[[#while_cond]] if.end: br label %while.cond diff --git a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll index 3db7545b81780c..ac330a96444b82 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-convergence-in-break.ll @@ -1,21 +1,20 @@ -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan-compute" define internal spir_func void @main() #0 { -; CHECK: OpDecorate %[[#builtin:]] BuiltIn SubgroupLocalInvocationId - +; CHECK-DAG: OpName %[[#idx:]] "idx" +; CHECK-DAG: OpDecorate %[[#builtin:]] BuiltIn SubgroupLocalInvocationId ; CHECK-DAG: %[[#int_ty:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#pint_ty:]] = OpTypePointer Function %[[#int_ty]] ; CHECK-DAG: %[[#bool_ty:]] = OpTypeBool ; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#int_ty]] 0 ; CHECK-DAG: %[[#int_10:]] = OpConstant %[[#int_ty]] 10 ; CHECK: %[[#entry:]] = OpLabel -; CHECK: %[[#idx:]] = OpVariable %[[#pint_ty]] Function +; CHECK: %[[#idx]] = OpVariable %[[#]] Function ; CHECK: OpStore %[[#idx]] %[[#int_0]] Aligned 4 ; CHECK: OpBranch %[[#while_cond:]] entry: @@ -48,17 +47,18 @@ while.body: %cmp1 = icmp eq i32 %4, 0 br i1 %cmp1, label %if.then, label %if.end +; CHECK: %[[#if_end]] = OpLabel +; CHECK: OpBranch %[[#while_cond]] + ; CHECK: %[[#if_then]] = OpLabel -; CHECK-NEXT: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#builtin]] Aligned 1 -; CHECK-NEXT: OpStore %[[#idx]] %[[#tmp]] Aligned 4 +; CHECK: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#builtin]] Aligned 1 +; CHECK: OpStore %[[#idx]] %[[#tmp]] Aligned 4 ; CHECK: OpBranch %[[#new_end]] if.then: %5 = call i32 @__hlsl_wave_get_lane_index() [ "convergencectrl"(token %1) ] store i32 %5, ptr %idx, align 4 br label %while.end -; CHECK: %[[#if_end]] = OpLabel -; CHECK: OpBranch %[[#while_cond]] if.end: br label %while.cond diff --git a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll index d25b30df45ae04..784bd38a6fbaed 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/merge-exit-multiple-break.ll @@ -1,14 +1,15 @@ -; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} ; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan-compute" define internal spir_func void @main() #0 { -; CHECK: OpDecorate %[[#builtin:]] BuiltIn SubgroupLocalInvocationId +; CHECK-DAG: OpName %[[#idx:]] "idx" +; CHECK-DAG: OpName %[[#reg_0:]] "reg" +; CHECK-DAG: OpDecorate %[[#builtin:]] BuiltIn SubgroupLocalInvocationId ; CHECK-DAG: %[[#int_ty:]] = OpTypeInt 32 0 -; CHECK-DAG: %[[#pint_ty:]] = OpTypePointer Function %[[#int_ty]] ; CHECK-DAG: %[[#bool_ty:]] = OpTypeBool ; CHECK-DAG: %[[#int_0:]] = OpConstant %[[#int_ty]] 0 ; CHECK-DAG: %[[#int_1:]] = OpConstant %[[#int_ty]] 1 @@ -16,7 +17,7 @@ define internal spir_func void @main() #0 { ; CHECK-DAG: %[[#int_10:]] = OpConstant %[[#int_ty]] 10 ; CHECK: %[[#entry:]] = OpLabel -; CHECK: %[[#idx:]] = OpVariable %[[#pint_ty]] Function +; CHECK: %[[#idx]] = OpVariable %[[#]] Function ; CHECK: OpStore %[[#idx]] %[[#int_0]] Aligned 4 ; CHECK: OpBranch %[[#while_cond:]] entry: @@ -26,6 +27,7 @@ entry: br label %while.cond ; CHECK: %[[#while_cond]] = OpLabel +; CHECK: OpStore %[[#reg_0]] %[[#]] Aligned 4 ; CHECK: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#idx]] Aligned 4 ; CHECK: %[[#cmp:]] = OpINotEqual %[[#bool_ty]] %[[#tmp]] %[[#int_10]] ; CHECK: OpLoopMerge %[[#new_end:]] %[[#if_end2:]] None @@ -37,10 +39,11 @@ while.cond: br i1 %cmp, label %while.body, label %while.end ; CHECK: %[[#while_body]] = OpLabel -; CHECK-NEXT: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#builtin]] Aligned 1 -; CHECK-NEXT: OpStore %[[#idx]] %[[#tmp]] Aligned 4 -; CHECK-NEXT: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#idx]] Aligned 4 -; CHECK-NEXT: %[[#cmp1:]] = OpIEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0]] +; CHECK: OpStore %[[#reg_0]] %[[#]] Aligned 4 +; CHECK: %[[#tmp:]] = OpLoad %[[#]] %[[#builtin]] Aligned 1 +; CHECK: OpStore %[[#idx]] %[[#tmp]] Aligned 4 +; CHECK: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#idx]] Aligned 4 +; CHECK: %[[#cmp1:]] = OpIEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0]] ; CHECK: OpBranchConditional %[[#cmp1]] %[[#new_end]] %[[#if_end:]] while.body: %3 = call i32 @__hlsl_wave_get_lane_index() [ "convergencectrl"(token %1) ] @@ -50,10 +53,11 @@ while.body: br i1 %cmp1, label %if.then, label %if.end ; CHECK: %[[#if_end]] = OpLabel -; CHECK-NEXT: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#builtin]] Aligned 1 -; CHECK-NEXT: OpStore %[[#idx]] %[[#tmp]] Aligned 4 -; CHECK-NEXT: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#idx]] Aligned 4 -; CHECK-NEXT: %[[#cmp2:]] = OpIEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0]] +; CHECK: OpStore %[[#reg_0]] %[[#]] Aligned 4 +; CHECK: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#builtin]] Aligned 1 +; CHECK: OpStore %[[#idx]] %[[#tmp]] Aligned 4 +; CHECK: %[[#tmp:]] = OpLoad %[[#int_ty]] %[[#idx]] Aligned 4 +; CHECK: %[[#cmp2:]] = OpIEqual %[[#bool_ty]] %[[#tmp]] %[[#int_0]] ; CHECK: OpBranchConditional %[[#cmp2]] %[[#new_end]] %[[#if_end2]] if.end: %5 = call i32 @__hlsl_wave_get_lane_index() [ "convergencectrl"(token %1) ] @@ -62,14 +66,15 @@ if.end: %cmp2 = icmp eq i32 %6, 0 br i1 %cmp2, label %if.then2, label %if.end2 +; CHECK: %[[#if_end2]] = OpLabel +; CHECK: OpBranch %[[#while_cond]] + ; TODO: this OpSwitch is useless. Improve the "remove useless branches" step of the structurizer to ; cleanup those. ; CHECK: %[[#new_end]] = OpLabel -; CHECK: %[[#route:]] = OpPhi %[[#int_ty]] %[[#int_0]] %[[#while_cond]] %[[#int_1]] %[[#while_body]] %[[#int_2]] %[[#if_end]] +; CHECK: %[[#route:]] = OpLoad %[[#]] %[[#reg_0]] Aligned 4 ; CHECK: OpSwitch %[[#route]] %[[#while_end:]] 1 %[[#while_end:]] 2 %[[#while_end:]] -; CHECK: %[[#if_end2]] = OpLabel -; CHECK: OpBranch %[[#while_cond]] if.end2: br label %while.cond diff --git a/llvm/test/CodeGen/SPIRV/structurizer/phi-exit.ll b/llvm/test/CodeGen/SPIRV/structurizer/phi-exit.ll new file mode 100644 index 00000000000000..541b23a6495c9a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/structurizer/phi-exit.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" +target triple = "spirv-unknown-vulkan1.3-compute" + +define internal spir_func void @main() #1 { +; CHECK: %[[#entry:]] = OpLabel +; CHECK: OpBranch %[[#do_body:]] +entry: + %0 = call token @llvm.experimental.convergence.entry() + %a = alloca i32, align 4 + br label %loop_body + +loop_body: + br i1 true, label %left, label %right + +left: + br i1 true, label %loop_exit, label %loop_continue + +right: + br i1 true, label %loop_exit, label %loop_continue + +loop_continue: + br label %loop_body + +loop_exit: + %r = phi i32 [ 0, %left ], [ 1, %right ] + store i32 %r, ptr %a, align 4 + ret void + +} + + +declare token @llvm.experimental.convergence.entry() #0 +declare token @llvm.experimental.convergence.loop() #0 + +attributes #0 = { convergent nocallback nofree nosync nounwind willreturn memory(none) } +attributes #1 = { convergent noinline norecurse nounwind optnone "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} + diff --git a/llvm/test/CodeGen/SPIRV/structurizer/return-early.ll b/llvm/test/CodeGen/SPIRV/structurizer/return-early.ll index 6f60538153dfc9..e27aca8784286c 100644 --- a/llvm/test/CodeGen/SPIRV/structurizer/return-early.ll +++ b/llvm/test/CodeGen/SPIRV/structurizer/return-early.ll @@ -1,6 +1,9 @@ -; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s --match-full-lines +; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 %s -o - | FileCheck %s ; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %} +; CHECK-DAG: OpName %[[#reg_0:]] "reg2" +; CHECK-DAG: OpName %[[#reg_1:]] "reg1" + target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-G1" target triple = "spirv-unknown-vulkan1.3-compute" @@ -37,36 +40,40 @@ while.body: ] ; CHECK: %[[#case_1]] = OpLabel +; CHECK: OpStore %[[#reg_0]] %[[#]] ; CHECK: OpBranch %[[#switch_end]] sw.bb: store i32 1, ptr %a, align 4 br label %while.end ; CHECK: %[[#case_2]] = OpLabel +; CHECK: OpStore %[[#reg_0]] %[[#]] ; CHECK: OpBranch %[[#switch_end]] sw.bb1: store i32 3, ptr %a, align 4 br label %while.end ; CHECK: %[[#case_5]] = OpLabel +; CHECK: OpStore %[[#reg_0]] %[[#]] ; CHECK: OpBranch %[[#switch_end]] sw.bb2: store i32 5, ptr %a, align 4 br label %while.end ; CHECK: %[[#switch_end]] = OpLabel -; CHECK: %[[#phi:]] = OpPhi %[[#type:]] %[[#A:]] %[[#while_body]] %[[#B:]] %[[#case_5]] %[[#B:]] %[[#case_2]] %[[#B:]] %[[#case_1]] -; CHECK: %[[#tmp:]] = OpIEqual %[[#type:]] %[[#A]] %[[#phi]] +; CHECK: %[[#val:]] = OpLoad %[[#]] %[[#reg_0]] +; CHECK: %[[#tmp:]] = OpIEqual %[[#type:]] %[[#]] %[[#val]] ; CHECK: OpBranchConditional %[[#tmp]] %[[#sw_default:]] %[[#while_end]] ; CHECK: %[[#sw_default]] = OpLabel -; CHECK: OpStore %[[#A:]] %[[#B:]] Aligned 4 +; CHECK: OpStore %[[#]] %[[#B:]] Aligned 4 ; CHECK: OpBranch %[[#for_cond:]] sw.default: store i32 0, ptr %i, align 4 br label %for.cond ; CHECK: %[[#for_cond]] = OpLabel +; CHECK: OpStore %[[#reg_1]] %[[#]] ; CHECK: OpSelectionMerge %[[#for_merge:]] None ; CHECK-NEXT: OpBranchConditional %[[#cond:]] %[[#for_merge]] %[[#for_end:]] for.cond: @@ -76,13 +83,14 @@ for.cond: br i1 %cmp, label %for.body, label %for.end ; CHECK: %[[#for_end]] = OpLabel +; CHECK: OpStore %[[#reg_1]] %[[#]] ; CHECK: OpBranch %[[#for_merge]] for.end: br label %while.end ; CHECK: %[[#for_merge]] = OpLabel -; CHECK: %[[#phi:]] = OpPhi %[[#type:]] %[[#A:]] %[[#for_cond]] %[[#B:]] %[[#for_end]] -; CHECK: %[[#tmp:]] = OpIEqual %[[#type:]] %[[#A]] %[[#phi]] +; CHECK: %[[#val:]] = OpLoad %[[#]] %[[#reg_1]] +; CHECK: %[[#tmp:]] = OpIEqual %[[#type:]] %[[#]] %[[#val]] ; CHECK: OpBranchConditional %[[#tmp]] %[[#for_body:]] %[[#while_end]] ; CHECK: %[[#for_body]] = OpLabel diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll index 1df29f0b12d1b6..fc573fbd4fc99d 100644 --- a/llvm/test/CodeGen/X86/andnot-patterns.ll +++ b/llvm/test/CodeGen/X86/andnot-patterns.ll @@ -14,41 +14,73 @@ declare void @use_i32(i32) ; define i64 @andnot_rotl_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { -; X86-LABEL: andnot_rotl_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: notl %esi -; X86-NEXT: notl %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: jne .LBB0_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %edx, %eax -; X86-NEXT: jmp .LBB0_3 -; X86-NEXT: .LBB0_1: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %edx, %esi -; X86-NEXT: .LBB0_3: -; X86-NEXT: movl %esi, %edx -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shldl %cl, %esi, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotl_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: jne .LBB0_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: jmp .LBB0_3 +; X86-NOBMI-NEXT: .LBB0_1: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: .LBB0_3: +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: shldl %cl, %edx, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: shldl %cl, %esi, %edx +; X86-NOBMI-NEXT: notl %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotl_i64: -; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: notq %rax -; X64-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NEXT: rolq %cl, %rax -; X64-NEXT: andq %rdi, %rax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotl_i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: testb $32, %cl +; X86-BMI-NEXT: jne .LBB0_1 +; X86-BMI-NEXT: # %bb.2: +; X86-BMI-NEXT: movl %eax, %esi +; X86-BMI-NEXT: jmp .LBB0_3 +; X86-BMI-NEXT: .LBB0_1: +; X86-BMI-NEXT: movl %edx, %esi +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: .LBB0_3: +; X86-BMI-NEXT: movl %edx, %eax +; X86-BMI-NEXT: shldl %cl, %esi, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI-NEXT: shldl %cl, %edx, %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %edx +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotl_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdx, %rcx +; X64-NOBMI-NEXT: movq %rsi, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI-NEXT: rolq %cl, %rax +; X64-NOBMI-NEXT: notq %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotl_i64: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movq %rdx, %rcx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI-NEXT: rolq %cl, %rsi +; X64-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %rot = tail call i64 @llvm.fshl.i64(i64 %not, i64 %not, i64 %a2) %and = and i64 %rot, %a0 @@ -56,24 +88,40 @@ define i64 @andnot_rotl_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { } define i32 @andnot_rotl_i32(i32 %a0, i32 %a1, i32 %a2) nounwind { -; X86-LABEL: andnot_rotl_i32: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: roll %cl, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotl_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: roll %cl, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotl_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: roll %cl, %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotl_i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: roll %cl, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotl_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: roll %cl, %eax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotl_i32: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: roll %cl, %esi +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %rot = tail call i32 @llvm.fshl.i32(i32 %not, i32 %not, i32 %a2) %and = and i32 %rot, %a0 @@ -84,23 +132,32 @@ define i16 @andnot_rotl_i16(i16 %a0, i16 %a1, i16 %a2) nounwind { ; X86-LABEL: andnot_rotl_i16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rolw %cl, %ax +; X86-NEXT: notl %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: andnot_rotl_i16: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rolw %cl, %ax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-NOBMI-LABEL: andnot_rotl_i16: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rolw %cl, %ax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotl_i16: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rolw %cl, %si +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 %rot = tail call i16 @llvm.fshl.i16(i16 %not, i16 %not, i16 %a2) %and = and i16 %rot, %a0 @@ -112,8 +169,8 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notb %al ; X86-NEXT: rolb %cl, %al +; X86-NEXT: notb %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; @@ -121,9 +178,9 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %esi, %eax -; X64-NEXT: notb %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rolb %cl, %al +; X64-NEXT: notb %al ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -198,41 +255,73 @@ define i64 @andnot_rotl_i64_multiuse_rot(i64 %a0, i64 %a1, i64 %a2) nounwind { ; define i64 @andnot_rotr_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { -; X86-LABEL: andnot_rotr_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: notl %esi -; X86-NEXT: notl %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: je .LBB5_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %edx, %eax -; X86-NEXT: jmp .LBB5_3 -; X86-NEXT: .LBB5_1: -; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl %edx, %esi -; X86-NEXT: .LBB5_3: -; X86-NEXT: movl %esi, %edx -; X86-NEXT: shrdl %cl, %eax, %edx -; X86-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NEXT: shrdl %cl, %esi, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotr_i64: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: testb $32, %cl +; X86-NOBMI-NEXT: je .LBB5_1 +; X86-NOBMI-NEXT: # %bb.2: +; X86-NOBMI-NEXT: movl %eax, %edx +; X86-NOBMI-NEXT: jmp .LBB5_3 +; X86-NOBMI-NEXT: .LBB5_1: +; X86-NOBMI-NEXT: movl %esi, %edx +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: .LBB5_3: +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: shrdl %cl, %edx, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NOBMI-NEXT: shrdl %cl, %esi, %edx +; X86-NOBMI-NEXT: notl %edx +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotr_i64: -; X64: # %bb.0: -; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: notq %rax -; X64-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-NEXT: rorq %cl, %rax -; X64-NEXT: andq %rdi, %rax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotr_i64: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: testb $32, %cl +; X86-BMI-NEXT: je .LBB5_1 +; X86-BMI-NEXT: # %bb.2: +; X86-BMI-NEXT: movl %eax, %esi +; X86-BMI-NEXT: jmp .LBB5_3 +; X86-BMI-NEXT: .LBB5_1: +; X86-BMI-NEXT: movl %edx, %esi +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: .LBB5_3: +; X86-BMI-NEXT: movl %edx, %eax +; X86-BMI-NEXT: shrdl %cl, %esi, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-BMI-NEXT: shrdl %cl, %edx, %esi +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %esi, %edx +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotr_i64: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rdx, %rcx +; X64-NOBMI-NEXT: movq %rsi, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-NOBMI-NEXT: rorq %cl, %rax +; X64-NOBMI-NEXT: notq %rax +; X64-NOBMI-NEXT: andq %rdi, %rax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i64: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movq %rdx, %rcx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $rcx +; X64-BMI-NEXT: rorq %cl, %rsi +; X64-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %rot = tail call i64 @llvm.fshr.i64(i64 %not, i64 %not, i64 %a2) %and = and i64 %rot, %a0 @@ -240,24 +329,40 @@ define i64 @andnot_rotr_i64(i64 %a0, i64 %a1, i64 %a2) nounwind { } define i32 @andnot_rotr_i32(i32 %a0, i32 %a1, i32 %a2) nounwind { -; X86-LABEL: andnot_rotr_i32: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: rorl %cl, %eax -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotr_i32: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: rorl %cl, %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotr_i32: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %eax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotr_i32: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: rorl %cl, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotr_i32: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rorl %cl, %eax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i32: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rorl %cl, %esi +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2) %and = and i32 %rot, %a0 @@ -268,23 +373,32 @@ define i16 @andnot_rotr_i16(i16 %a0, i16 %a1, i16 %a2) nounwind { ; X86-LABEL: andnot_rotr_i16: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: rorw %cl, %ax +; X86-NEXT: notl %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: andnot_rotr_i16: -; X64: # %bb.0: -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: movl %esi, %eax -; X64-NEXT: notl %eax -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorw %cl, %ax -; X64-NEXT: andl %edi, %eax -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-NOBMI-LABEL: andnot_rotr_i16: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: movl %esi, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rorw %cl, %ax +; X64-NOBMI-NEXT: notl %eax +; X64-NOBMI-NEXT: andl %edi, %eax +; X64-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i16: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rorw %cl, %si +; X64-BMI-NEXT: andnl %edi, %esi, %eax +; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 %rot = tail call i16 @llvm.fshr.i16(i16 %not, i16 %not, i16 %a2) %and = and i16 %rot, %a0 @@ -296,8 +410,8 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notb %al ; X86-NEXT: rorb %cl, %al +; X86-NEXT: notb %al ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: retl ; @@ -305,9 +419,9 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl %edx, %ecx ; X64-NEXT: movl %esi, %eax -; X64-NEXT: notb %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NEXT: rorb %cl, %al +; X64-NEXT: notb %al ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -318,36 +432,67 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind { } define i32 @andnot_rotr_i32_multiuse_not(i32 %a0, i32 %a1, i32 %a2) nounwind { -; X86-LABEL: andnot_rotr_i32_multiuse_not: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: notl %eax -; X86-NEXT: movl %eax, %esi -; X86-NEXT: rorl %cl, %esi -; X86-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-NEXT: pushl %eax -; X86-NEXT: calll use_i32@PLT -; X86-NEXT: addl $4, %esp -; X86-NEXT: movl %esi, %eax -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_rotr_i32_multiuse_not: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: pushl %esi +; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: movl %eax, %esi +; X86-NOBMI-NEXT: rorl %cl, %esi +; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %esi +; X86-NOBMI-NEXT: pushl %eax +; X86-NOBMI-NEXT: calll use_i32@PLT +; X86-NOBMI-NEXT: addl $4, %esp +; X86-NOBMI-NEXT: movl %esi, %eax +; X86-NOBMI-NEXT: popl %esi +; X86-NOBMI-NEXT: retl ; -; X64-LABEL: andnot_rotr_i32_multiuse_not: -; X64: # %bb.0: -; X64-NEXT: pushq %rbx -; X64-NEXT: movl %edx, %ecx -; X64-NEXT: notl %esi -; X64-NEXT: movl %esi, %ebx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: rorl %cl, %ebx -; X64-NEXT: andl %edi, %ebx -; X64-NEXT: movl %esi, %edi -; X64-NEXT: callq use_i32@PLT -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: popq %rbx -; X64-NEXT: retq +; X86-BMI-LABEL: andnot_rotr_i32_multiuse_not: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi +; X86-BMI-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: notl %edx +; X86-BMI-NEXT: rorl %cl, %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %esi +; X86-BMI-NEXT: pushl %edx +; X86-BMI-NEXT: calll use_i32@PLT +; X86-BMI-NEXT: addl $4, %esp +; X86-BMI-NEXT: movl %esi, %eax +; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: retl +; +; X64-NOBMI-LABEL: andnot_rotr_i32_multiuse_not: +; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: pushq %rbx +; X64-NOBMI-NEXT: movl %edx, %ecx +; X64-NOBMI-NEXT: notl %esi +; X64-NOBMI-NEXT: movl %esi, %ebx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: rorl %cl, %ebx +; X64-NOBMI-NEXT: andl %edi, %ebx +; X64-NOBMI-NEXT: movl %esi, %edi +; X64-NOBMI-NEXT: callq use_i32@PLT +; X64-NOBMI-NEXT: movl %ebx, %eax +; X64-NOBMI-NEXT: popq %rbx +; X64-NOBMI-NEXT: retq +; +; X64-BMI-LABEL: andnot_rotr_i32_multiuse_not: +; X64-BMI: # %bb.0: +; X64-BMI-NEXT: pushq %rbx +; X64-BMI-NEXT: movl %edx, %ecx +; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: notl %eax +; X64-BMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-BMI-NEXT: rorl %cl, %esi +; X64-BMI-NEXT: andnl %edi, %esi, %ebx +; X64-BMI-NEXT: movl %eax, %edi +; X64-BMI-NEXT: callq use_i32@PLT +; X64-BMI-NEXT: movl %ebx, %eax +; X64-BMI-NEXT: popq %rbx +; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2) %and = and i32 %rot, %a0 diff --git a/llvm/test/CodeGen/X86/avx10_2-cmp.ll b/llvm/test/CodeGen/X86/avx10_2-cmp.ll new file mode 100644 index 00000000000000..de0bec7ea2695a --- /dev/null +++ b/llvm/test/CodeGen/X86/avx10_2-cmp.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx10.2-256 | FileCheck %s --check-prefix=X86 + +define i1 @hoeq(half %x, half %y) { +; X64-LABEL: hoeq: +; X64: # %bb.0: +; X64-NEXT: vucomxsh %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: hoeq: +; X86: # %bb.0: +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq half %x, %y + ret i1 %1 +} + +define i1 @hune(half %x, half %y) { +; X64-LABEL: hune: +; X64: # %bb.0: +; X64-NEXT: vucomxsh %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: hune: +; X86: # %bb.0: +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @hoeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hoeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: hoeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp oeq half %x, %y + ret i1 %1 +} + +define i1 @hune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: hune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X64-NEXT: vucomxsh (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: hune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsh {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero +; X86-NEXT: vucomxsh (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load half, ptr %xp + %y = load half, ptr %yp + %1 = fcmp une half %x, %y + ret i1 %1 +} + +define i1 @foeq(float %x, float %y) { +; X64-LABEL: foeq: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: foeq: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @fune(float %x, float %y) { +; X64-LABEL: fune: +; X64: # %bb.0: +; X64-NEXT: vucomxss %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: fune: +; X86: # %bb.0: +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une float %x, %y + ret i1 %1 +} + +define i1 @foeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: foeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: foeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp oeq float %x, %y + ret i1 %1 +} + +define i1 @fune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: fune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: vucomxss (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: fune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: vucomxss (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load float, ptr %xp + %y = load float, ptr %yp + %1 = fcmp une float %x, %y + ret i1 %1 +} + +define i1 @doeq(double %x, double %y) { +; X64-LABEL: doeq: +; X64: # %bb.0: +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %1 = fcmp oeq double %x, %y + ret i1 %1 +} + +define i1 @dune(double %x, double %y) { +; X64-LABEL: dune: +; X64: # %bb.0: +; X64-NEXT: vucomxsd %xmm1, %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: dune: +; X86: # %bb.0: +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %1 = fcmp une double %x, %y + ret i1 %1 +} + +define i1 @doeq_mem(ptr %xp, ptr %yp) { +; X64-LABEL: doeq_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-LABEL: doeq_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: sete %al +; X86-NEXT: retl + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp oeq double %x, %y + ret i1 %1 +} + +define i1 @dune_mem(ptr %xp, ptr %yp) { +; X64-LABEL: dune_mem: +; X64: # %bb.0: +; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: vucomxsd (%rsi), %xmm0 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-LABEL: dune_mem: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: vucomxsd (%eax), %xmm0 +; X86-NEXT: setne %al +; X86-NEXT: retl + %x = load double, ptr %xp + %y = load double, ptr %yp + %1 = fcmp une double %x, %y + ret i1 %1 +} diff --git a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll index 7b81d547db085c..5f2bcf0556b021 100644 --- a/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2_512bf16-intrinsics.ll @@ -76,13 +76,15 @@ declare <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat>, i32) define i32 @test_int_x86_avx512_fpclass_nepbf16_512(<32 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vfpclasspbf16 $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x02] -; CHECK-NEXT: vfpclasspbf16 $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: vfpclasspbf16 $6, %zmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x48,0x66,0xc8,0x06] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) | isNegativeZero(zmm0) +; CHECK-NEXT: vfpclasspbf16 $0, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x49,0x66,0xc0,0x00] +; CHECK-NEXT: # k0 {%k1} = false ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 4) - %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 2) + %res = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 0) + %res1 = call <32 x i1> @llvm.x86.avx10.fpclass.nepbf16.512(<32 x bfloat> %x0, i32 6) %1 = and <32 x i1> %res1, %res %2 = bitcast <32 x i1> %1 to i32 ret i32 %2 diff --git a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll index 559d866b55cc7b..59151d4dd96099 100644 --- a/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx10_2bf16-intrinsics.ll @@ -298,6 +298,7 @@ define i8 @test_int_x86_avx512_fpclass_nepbf16_128(<8 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspbf16 $2, %xmm0, %k1 # encoding: [0x62,0xf3,0x7f,0x08,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(xmm0) ; CHECK-NEXT: vfpclasspbf16 $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x09,0x66,0xc0,0x04] ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -313,6 +314,7 @@ define i16 @test_int_x86_avx512_fpclass_nepbf16_256(<16 x bfloat> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_nepbf16_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspbf16 $2, %ymm0, %k1 # encoding: [0x62,0xf3,0x7f,0x28,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(ymm0) ; CHECK-NEXT: vfpclasspbf16 $4, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7f,0x29,0x66,0xc0,0x04] ; CHECK-NEXT: kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll index 64063bdf8333eb..53193597d62f08 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-fast-isel.ll @@ -7,7 +7,7 @@ define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> %__A) { ; X86-LABEL: test_mm512_mask_fpclass_pd_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: vfpclasspd $4, %zmm0, %k0 +; X86-NEXT: vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -16,7 +16,7 @@ define zeroext i8 @test_mm512_mask_fpclass_pd_mask(i8 zeroext %__U, <8 x double> ; ; X64-LABEL: test_mm512_mask_fpclass_pd_mask: ; X64: # %bb.0: # %entry -; X64-NEXT: vfpclasspd $4, %zmm0, %k0 +; X64-NEXT: vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -35,7 +35,7 @@ declare <8 x i1> @llvm.x86.avx512.fpclass.pd.512(<8 x double>, i32) define zeroext i8 @test_mm512_fpclass_pd_mask(<8 x double> %__A) { ; CHECK-LABEL: test_mm512_fpclass_pd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 +; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper @@ -49,7 +49,7 @@ entry: define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x float> %__A) { ; X86-LABEL: test_mm512_mask_fpclass_ps_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: vfpclassps $4, %zmm0, %k0 +; X86-NEXT: vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: andw {{[0-9]+}}(%esp), %ax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -58,7 +58,7 @@ define zeroext i16 @test_mm512_mask_fpclass_ps_mask(i16 zeroext %__U, <16 x floa ; ; X64-LABEL: test_mm512_mask_fpclass_ps_mask: ; X64: # %bb.0: # %entry -; X64-NEXT: vfpclassps $4, %zmm0, %k0 +; X64-NEXT: vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: andl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax @@ -77,7 +77,7 @@ declare <16 x i1> @llvm.x86.avx512.fpclass.ps.512(<16 x float>, i32) define zeroext i16 @test_mm512_fpclass_ps_mask(<16 x float> %__A) { ; CHECK-LABEL: test_mm512_fpclass_ps_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 +; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 # k0 = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper @@ -91,7 +91,7 @@ entry: define zeroext i8 @test_mm_fpclass_sd_mask(<4 x float> %__A) { ; CHECK-LABEL: test_mm_fpclass_sd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 +; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -107,7 +107,7 @@ define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__ ; X86-LABEL: test_mm_mask_fpclass_sd_mask: ; X86: # %bb.0: # %entry ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} +; X86-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl @@ -115,7 +115,7 @@ define zeroext i8 @test_mm_mask_fpclass_sd_mask(i8 zeroext %__U, <4 x float> %__ ; X64-LABEL: test_mm_mask_fpclass_sd_mask: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} +; X64-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -128,7 +128,7 @@ entry: define zeroext i8 @test_mm_fpclass_ss_mask(<4 x float> %__A) { ; CHECK-LABEL: test_mm_fpclass_ss_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 +; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -143,7 +143,7 @@ define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__ ; X86-LABEL: test_mm_mask_fpclass_ss_mask: ; X86: # %bb.0: # %entry ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} +; X86-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl @@ -151,7 +151,7 @@ define zeroext i8 @test_mm_mask_fpclass_ss_mask(i8 zeroext %__U, <4 x float> %__ ; X64-LABEL: test_mm_mask_fpclass_ss_mask: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} +; X64-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # k0 {%k1} = isPositiveZero(xmm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll index 23e929aa9d89b1..8a0428d022b6d7 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll @@ -654,7 +654,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -669,7 +671,9 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index 027bca9c8badf9..70f60c802a2d52 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -726,7 +726,9 @@ define i8 @test_int_x86_avx512_fpclass_pd_512(<8 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %zmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclasspd $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -743,7 +745,9 @@ define i16@test_int_x86_avx512_fpclass_ps_512(<16 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $2, %zmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x48,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(zmm0) ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x66,0xc0,0x04] +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(zmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] @@ -761,7 +765,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_sd: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasssd $4, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclasssd $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x67,0xc0,0x02] +; CHECK-NEXT: # k0 {%k1} = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] @@ -775,6 +781,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd_load(ptr %x0ptr) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vfpclasssd $4, (%eax), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x00,0x04] +; X86-NEXT: # k0 = isNegativeZero(mem) ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl # encoding: [0xc3] @@ -782,6 +789,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_sd_load(ptr %x0ptr) { ; X64-LABEL: test_int_x86_avx512_mask_fpclass_sd_load: ; X64: # %bb.0: ; X64-NEXT: vfpclasssd $4, (%rdi), %k0 # encoding: [0x62,0xf3,0xfd,0x08,0x67,0x07,0x04] +; X64-NEXT: # k0 = isNegativeZero(mem) ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq # encoding: [0xc3] @@ -796,7 +804,9 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ss: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassss $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclassss $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x67,0xc0,0x02] +; CHECK-NEXT: # k0 {%k1} = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] @@ -810,6 +820,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(ptr %x0ptr, i8 %x1) { ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: vfpclassss $4, (%eax), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x00,0x04] +; X86-NEXT: # k0 = isNegativeZero(mem) ; X86-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl # encoding: [0xc3] @@ -817,6 +828,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ss_load(ptr %x0ptr, i8 %x1) { ; X64-LABEL: test_int_x86_avx512_mask_fpclass_ss_load: ; X64: # %bb.0: ; X64-NEXT: vfpclassss $4, (%rdi), %k0 # encoding: [0x62,0xf3,0x7d,0x08,0x67,0x07,0x04] +; X64-NEXT: # k0 = isNegativeZero(mem) ; X64-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll index 703591acef5720..a8a38d9c481133 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-fast-isel.ll @@ -235,7 +235,7 @@ declare <2 x i1> @llvm.x86.avx512.fpclass.pd.128(<2 x double>, i32) define zeroext i8 @test_mm_fpclass_pd_mask(<2 x double> %__A) { ; CHECK-LABEL: test_mm_fpclass_pd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 +; CHECK-NEXT: vfpclasspd $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -279,7 +279,7 @@ declare <4 x i1> @llvm.x86.avx512.fpclass.pd.256(<4 x double>, i32) define zeroext i8 @test_mm256_fpclass_pd_mask(<4 x double> %__A) { ; CHECK-LABEL: test_mm256_fpclass_pd_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 +; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper @@ -322,7 +322,7 @@ declare <4 x i1> @llvm.x86.avx512.fpclass.ps.128(<4 x float>, i32) define zeroext i8 @test_mm_fpclass_ps_mask(<4 x float> %__A) { ; CHECK-LABEL: test_mm_fpclass_ps_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 +; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 # k0 = isPositiveZero(xmm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: ret{{[l|q]}} @@ -336,7 +336,7 @@ entry: define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> %__A) { ; X86-LABEL: test_mm256_mask_fpclass_ps_mask: ; X86: # %bb.0: # %entry -; X86-NEXT: vfpclassps $2, %ymm0, %k0 +; X86-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; X86-NEXT: kmovw %k0, %eax ; X86-NEXT: andb {{[0-9]+}}(%esp), %al ; X86-NEXT: # kill: def $al killed $al killed $eax @@ -345,7 +345,7 @@ define zeroext i8 @test_mm256_mask_fpclass_ps_mask(i8 zeroext %__U, <8 x float> ; ; X64-LABEL: test_mm256_mask_fpclass_ps_mask: ; X64: # %bb.0: # %entry -; X64-NEXT: vfpclassps $2, %ymm0, %k0 +; X64-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; X64-NEXT: kmovw %k0, %eax ; X64-NEXT: andb %dil, %al ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -364,7 +364,7 @@ declare <8 x i1> @llvm.x86.avx512.fpclass.ps.256(<8 x float>, i32) define zeroext i8 @test_mm256_fpclass_ps_mask(<8 x float> %__A) { ; CHECK-LABEL: test_mm256_fpclass_ps_mask: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 +; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 # k0 = isPositiveZero(ymm0) ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll index 8d609eb7fdd009..f31dafcd686269 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics-upgrade.ll @@ -2921,6 +2921,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -2936,6 +2937,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -2952,6 +2954,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(xmm0) ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -2967,6 +2970,7 @@ define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 3b9f96ef452c32..ec94b593148dfa 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1500,6 +1500,7 @@ define i8 @test_int_x86_avx512_fpclass_ps_128(<4 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(xmm0) ; CHECK-NEXT: vfpclassps $2, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -1518,6 +1519,7 @@ define i8 @test_int_x86_avx512_fpclass_ps_256(<8 x float> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_ps_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclassps $4, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclassps $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -1536,6 +1538,7 @@ define i8 @test_int_x86_avx512_fpclass_pd_128(<2 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_128: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $2, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x66,0xc8,0x02] +; CHECK-NEXT: # k1 = isPositiveZero(xmm0) ; CHECK-NEXT: vfpclasspd $4, %xmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x66,0xc0,0x04] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -1554,6 +1557,7 @@ define i8 @test_int_x86_avx512_fpclass_pd_256(<4 x double> %x0) { ; CHECK-LABEL: test_int_x86_avx512_fpclass_pd_256: ; CHECK: # %bb.0: ; CHECK-NEXT: vfpclasspd $4, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x66,0xc8,0x04] +; CHECK-NEXT: # k1 = isNegativeZero(ymm0) ; CHECK-NEXT: vfpclasspd $2, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x66,0xc0,0x02] ; CHECK-NEXT: kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0] ; CHECK-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll index ed7109c416e7fd..9382ba31ab649d 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll @@ -234,6 +234,7 @@ define i32 @stack_fold_fpclassph(<32 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclassphz $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 64-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -253,6 +254,7 @@ define i32 @stack_fold_fpclassph_mask(<32 x half> %a0, ptr %p) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovd (%rdi), %k1 ; CHECK-NEXT: vfpclassphz $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 {%k1} # 64-byte Folded Reload +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq @@ -272,6 +274,7 @@ define i8 @stack_fold_fpclasssh(<8 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclasssh $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 16-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -290,6 +293,7 @@ define i8 @stack_fold_fpclasssh_mask(<8 x half> %a0, ptr %p) { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: kmovb (%rdi), %k1 ; CHECK-NEXT: vfpclasssh $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 {%k1} # 16-byte Folded Reload +; CHECK-NEXT: # k0 {%k1} = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll index e2ed997783f59b..3386f4a9b51981 100644 --- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll +++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16vl.ll @@ -110,6 +110,7 @@ define i8 @stack_fold_fpclassph(<8 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclassphx $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 16-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -148,6 +149,7 @@ define i16 @stack_fold_fpclassph_ymm(<16 x half> %a0) { ; CHECK-NEXT: nop ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vfpclassphy $4, {{[-0-9]+}}(%r{{[sb]}}p), %k0 # 32-byte Folded Reload +; CHECK-NEXT: # k0 = isNegativeZero(mem) ; CHECK-NEXT: kmovd %k0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: vzeroupper diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening-diagnostics.s new file mode 100644 index 00000000000000..231d4cd9967a49 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening-diagnostics.s @@ -0,0 +1,220 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-b16b16 < %s 2>&1 | FileCheck %s + +// BFMOP4A + +// Single vectors + +bfmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4a za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4a za0.h, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4a za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4a za0.h, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4a za0.h, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4a za4.h, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4a za0.h, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4a za0.h, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, z0.s, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z0.h, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +bfmop4s za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, z0.s, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.h..z14.h + +bfmop4s za0.h, z0.h, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +bfmop4s za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z0.s-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +bfmop4s za0.h, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +bfmop4s za0.h, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +bfmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s + +bfmop4s za4.h, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z0.s-z1.s}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z0.h-z1.h}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +bfmop4s za0.h, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +bfmop4s za0.h, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening.s b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening.s new file mode 100644 index 00000000000000..b98bb99def0569 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/bfmop4as-non-widening.s @@ -0,0 +1,178 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-b16b16 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-b16b16 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-b16b16 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +// BFMOP4A + +// Single vectors + +bfmop4a za0.h, z0.h, z16.h // 10000001-00100000-00000000-00001000 +// CHECK-INST: bfmop4a za0.h, z0.h, z16.h +// CHECK-ENCODING: [0x08,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200008 + +bfmop4a za1.h, z12.h, z24.h // 10000001-00101000-00000001-10001001 +// CHECK-INST: bfmop4a za1.h, z12.h, z24.h +// CHECK-ENCODING: [0x89,0x01,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280189 + +bfmop4a za1.h, z14.h, z30.h // 10000001-00101110-00000001-11001001 +// CHECK-INST: bfmop4a za1.h, z14.h, z30.h +// CHECK-ENCODING: [0xc9,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e01c9 + +// Single and multiple vectors + +bfmop4a za0.h, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00001000 +// CHECK-INST: bfmop4a za0.h, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x08,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300008 + +bfmop4a za1.h, z12.h, {z24.h-z25.h} // 10000001-00111000-00000001-10001001 +// CHECK-INST: bfmop4a za1.h, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x89,0x01,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380189 + +bfmop4a za1.h, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11001001 +// CHECK-INST: bfmop4a za1.h, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e01c9 + +// Multiple and single vectors + +bfmop4a za0.h, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00001000 +// CHECK-INST: bfmop4a za0.h, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x08,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200208 + +bfmop4a za1.h, {z12.h-z13.h}, z24.h // 10000001-00101000-00000011-10001001 +// CHECK-INST: bfmop4a za1.h, { z12.h, z13.h }, z24.h +// CHECK-ENCODING: [0x89,0x03,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280389 + +bfmop4a za1.h, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11001001 +// CHECK-INST: bfmop4a za1.h, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xc9,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e03c9 + +// Multiple vectors + +bfmop4a za0.h, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00001000 +// CHECK-INST: bfmop4a za0.h, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x08,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300208 + +bfmop4a za1.h, {z12.h-z13.h}, {z24.h-z25.h} // 10000001-00111000-00000011-10001001 +// CHECK-INST: bfmop4a za1.h, { z12.h, z13.h }, { z24.h, z25.h } +// CHECK-ENCODING: [0x89,0x03,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380389 + +bfmop4a za1.h, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11001001 +// CHECK-INST: bfmop4a za1.h, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e03c9 + + +// BFMOP4S + +// Single vectors + +bfmop4s za0.h, z0.h, z16.h // 10000001-00100000-00000000-00011000 +// CHECK-INST: bfmop4s za0.h, z0.h, z16.h +// CHECK-ENCODING: [0x18,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200018 + +bfmop4s za1.h, z12.h, z24.h // 10000001-00101000-00000001-10011001 +// CHECK-INST: bfmop4s za1.h, z12.h, z24.h +// CHECK-ENCODING: [0x99,0x01,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280199 + +bfmop4s za1.h, z14.h, z30.h // 10000001-00101110-00000001-11011001 +// CHECK-INST: bfmop4s za1.h, z14.h, z30.h +// CHECK-ENCODING: [0xd9,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e01d9 + +// Single and multiple vectors + +bfmop4s za0.h, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00011000 +// CHECK-INST: bfmop4s za0.h, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x18,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300018 + +bfmop4s za1.h, z12.h, {z24.h-z25.h} // 10000001-00111000-00000001-10011001 +// CHECK-INST: bfmop4s za1.h, z12.h, { z24.h, z25.h } +// CHECK-ENCODING: [0x99,0x01,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380199 + +bfmop4s za1.h, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11011001 +// CHECK-INST: bfmop4s za1.h, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e01d9 + +// Multiple and single vectors + +bfmop4s za0.h, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00011000 +// CHECK-INST: bfmop4s za0.h, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x18,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81200218 + +bfmop4s za1.h, {z12.h-z13.h}, z24.h // 10000001-00101000-00000011-10011001 +// CHECK-INST: bfmop4s za1.h, { z12.h, z13.h }, z24.h +// CHECK-ENCODING: [0x99,0x03,0x28,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81280399 + +bfmop4s za1.h, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11011001 +// CHECK-INST: bfmop4s za1.h, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xd9,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 812e03d9 + +// Multiple vectors + +bfmop4s za0.h, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00011000 +// CHECK-INST: bfmop4s za0.h, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x18,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81300218 + +bfmop4s za1.h, {z12.h-z13.h}, {z24.h-z25.h} // 10000001-00111000-00000011-10011001 +// CHECK-INST: bfmop4s za1.h, { z12.h, z13.h }, { z24.h, z25.h } +// CHECK-ENCODING: [0x99,0x03,0x38,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 81380399 + +bfmop4s za1.h, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11011001 +// CHECK-INST: bfmop4s za1.h, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 sme-b16b16 +// CHECK-UNKNOWN: 813e03d9 diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening-diagnostics.s new file mode 100644 index 00000000000000..457add20355e89 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening-diagnostics.s @@ -0,0 +1,243 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s + +// FMOP4A + +// Single vectors + +fmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z0.h, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +fmop4a za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4a za0.s, z0.h, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, z0.h, {z16.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4a za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za4.s, {z0.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4a za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +fmop4a za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.d}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z1.h}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.h-z1.h}, {z18.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// FMOP4S + +// Single vectors + +fmop4a za0.d, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z15.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z16.h, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z0.h, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, z12.h, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, z12.h, z14.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, z12.h, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Single and multiple vectors + +fmop4s za0.d, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z1.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z16.h, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register + +fmop4s za0.s, z0.h, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.h, {z17.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, z0.h, {z16.h-z18.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.h, {z12.h-z13.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4s za0.d, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4s za0.s, {z1.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z2.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z16.h-z17.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z1.h}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, {z0.h-z1.h}, z17.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +fmop4s za0.s, {z0.h-z1.h}, z12.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.h..z30.h + +// Multiple vectors + +fmop4s za0.d, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.h-z1.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.d}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z1.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z2.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z18.h-z19.h}, {z16.h-z17.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z1.h}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.h-z1.h}, {z19.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.h-z1.h}, {z18.h-z20.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.h-z1.h}, {z10.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening.s new file mode 100644 index 00000000000000..d615fb85b4fd7e --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp16-fp32-widening.s @@ -0,0 +1,177 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +// FMOP4A + +// Single vectors +fmop4a za0.s, z0.h, z16.h // 10000001-00100000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x00,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200000 + +fmop4a za1.s, z10.h, z20.h // 10000001-00100100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x41,0x01,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240141 + +fmop4a za3.s, z14.h, z30.h // 10000001-00101110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xc3,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e01c3 + +// Single and multiple vectors + +fmop4a za0.s, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300000 + +fmop4a za1.s, z10.h, {z20.h-z21.h} // 10000001-00110100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.h, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x01,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340141 + +fmop4a za3.s, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e01c3 + +// Multiple and single vectors + +fmop4a za0.s, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x00,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200200 + +fmop4a za1.s, {z10.h-z11.h}, z20.h // 10000001-00100100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.h, z11.h }, z20.h +// CHECK-ENCODING: [0x41,0x03,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240341 + +fmop4a za3.s, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xc3,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e03c3 + +// Multiple vectors + +fmop4a za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x00,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300200 + +fmop4a za1.s, {z10.h-z11.h}, {z20.h-z21.h} // 10000001-00110100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x03,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340341 + +fmop4a za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc3,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e03c3 + +// FMOP4S + +// Single vectors +fmop4s za0.s, z0.h, z16.h // 10000001-00100000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.h, z16.h +// CHECK-ENCODING: [0x10,0x00,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200010 + +fmop4s za1.s, z10.h, z20.h // 10000001-00100100-00000001-01010001 +// CHECK-INST: fmop4s za1.s, z10.h, z20.h +// CHECK-ENCODING: [0x51,0x01,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240151 + +fmop4s za3.s, z14.h, z30.h // 10000001-00101110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.h, z30.h +// CHECK-ENCODING: [0xd3,0x01,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e01d3 + +// Single and multiple vectors + +fmop4s za0.s, z0.h, {z16.h-z17.h} // 10000001-00110000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.h, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x00,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300010 + +fmop4s za1.s, z10.h, {z20.h-z21.h} // 10000001-00110100-00000001-01010001 +// CHECK-INST: fmop4s za1.s, z10.h, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x01,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340151 + +fmop4s za3.s, z14.h, {z30.h-z31.h} // 10000001-00111110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.h, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x01,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e01d3 + +// Multiple and single vectors + +fmop4s za0.s, {z0.h-z1.h}, z16.h // 10000001-00100000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.h, z1.h }, z16.h +// CHECK-ENCODING: [0x10,0x02,0x20,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81200210 + +fmop4s za1.s, {z10.h-z11.h}, z20.h // 10000001-00100100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.h, z11.h }, z20.h +// CHECK-ENCODING: [0x51,0x03,0x24,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81240351 + +fmop4s za3.s, {z14.h-z15.h}, z30.h // 10000001-00101110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.h, z15.h }, z30.h +// CHECK-ENCODING: [0xd3,0x03,0x2e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 812e03d3 + +// Multiple vectors + +fmop4s za0.s, {z0.h-z1.h}, {z16.h-z17.h} // 10000001-00110000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.h, z1.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x10,0x02,0x30,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81300210 + +fmop4s za1.s, {z10.h-z11.h}, {z20.h-z21.h} // 10000001-00110100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x03,0x34,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 81340351 + +fmop4s za3.s, {z14.h-z15.h}, {z30.h-z31.h} // 10000001-00111110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.h, z15.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd3,0x03,0x3e,0x81] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 813e03d3 diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening-diagnostics.s new file mode 100644 index 00000000000000..c9c59128f42060 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening-diagnostics.s @@ -0,0 +1,245 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2 < %s 2>&1 | FileCheck %s + +// FMOP4A + +// Single vectors + +fmop4a za0.d, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z15.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z16.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, z12.s, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, z12.s, z14.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, z12.s, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Single and multiple vectors + +fmop4a za0.d, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z1.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z16.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za0.s, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.s, {z17.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, z0.s, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, z0.s, {z12.s-z13.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.d, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4a za0.s, {z1.s-z2.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z2.s-z4.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z16.s-z17.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, {z0.s-z1.s}, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4a za0.s, {z0.s-z1.s}, z12.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Multiple vectors + +fmop4a za0.d, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4a za4.s, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z1.s-z2.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z2.s-z4.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z18.s-z19.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.s-z1.s}, {z19.s-z20.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.s, {z0.s-z1.s}, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.s, {z0.s-z1.s}, {z10.s-z11.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + + +// FMOP4S + +// Single vectors + +fmop4s za0.d, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z15.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z16.s, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, z12.s, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, z12.s, z14.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, z12.s, z31.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Single and multiple vectors + +fmop4s za0.d, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, z0.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z1.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z16.s, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za0.s, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.s, {z17.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, z0.s, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, z0.s, {z12.s-z13.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4s za0.d, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.s-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: mismatched register size suffix + +fmop4s za0.s, {z1.s-z2.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z2.s-z4.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z16.s-z17.s}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, {z0.s-z1.s}, z17.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +fmop4s za0.s, {z0.s-z1.s}, z12.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.s..z30.s + +// Multiple vectors + +fmop4s za0.d, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand + +fmop4s za4.s, {z0.s-z1.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z1.s-z2.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z2.s-z4.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z18.s-z19.s}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.s-z1.s}, {z19.s-z20.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.s, {z0.s-z1.s}, {z16.s-z18.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.s, {z0.s-z1.s}, {z10.s-z11.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening.s new file mode 100644 index 00000000000000..e65def17cd1b3d --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp32-non-widening.s @@ -0,0 +1,179 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +// FMOP4A + +// Single vectors + +fmop4a za0.s, z0.s, z16.s // 10000000-00000000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.s, z16.s +// CHECK-ENCODING: [0x00,0x00,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000000 + +fmop4a za3.s, z12.s, z24.s // 10000000-00001000-00000001-10000011 +// CHECK-INST: fmop4a za3.s, z12.s, z24.s +// CHECK-ENCODING: [0x83,0x01,0x08,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80080183 + +fmop4a za3.s, z14.s, z30.s // 10000000-00001110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.s, z30.s +// CHECK-ENCODING: [0xc3,0x01,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e01c3 + +// Single and multiple vectors + +fmop4a za0.s, z0.s, {z16.s-z17.s} // 10000000-00010000-00000000-00000000 +// CHECK-INST: fmop4a za0.s, z0.s, { z16.s, z17.s } +// CHECK-ENCODING: [0x00,0x00,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100000 + +fmop4a za1.s, z10.s, {z20.s-z21.s} // 10000000-00010100-00000001-01000001 +// CHECK-INST: fmop4a za1.s, z10.s, { z20.s, z21.s } +// CHECK-ENCODING: [0x41,0x01,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140141 + +fmop4a za3.s, z14.s, {z30.s-z31.s} // 10000000-00011110-00000001-11000011 +// CHECK-INST: fmop4a za3.s, z14.s, { z30.s, z31.s } +// CHECK-ENCODING: [0xc3,0x01,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e01c3 + +// Multiple and single vectors + +fmop4a za0.s, {z0.s-z1.s}, z16.s // 10000000-00000000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.s, z1.s }, z16.s +// CHECK-ENCODING: [0x00,0x02,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000200 + +fmop4a za1.s, {z10.s-z11.s}, z20.s // 10000000-00000100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.s, z11.s }, z20.s +// CHECK-ENCODING: [0x41,0x03,0x04,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80040341 + +fmop4a za3.s, {z14.s-z15.s}, z30.s // 10000000-00001110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.s, z15.s }, z30.s +// CHECK-ENCODING: [0xc3,0x03,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e03c3 + +// Multiple vectors + +fmop4a za0.s, {z0.s-z1.s}, {z16.s-z17.s} // 10000000-00010000-00000010-00000000 +// CHECK-INST: fmop4a za0.s, { z0.s, z1.s }, { z16.s, z17.s } +// CHECK-ENCODING: [0x00,0x02,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100200 + +fmop4a za1.s, {z10.s-z11.s}, {z20.s-z21.s} // 10000000-00010100-00000011-01000001 +// CHECK-INST: fmop4a za1.s, { z10.s, z11.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x41,0x03,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140341 + +fmop4a za3.s, {z14.s-z15.s}, {z30.s-z31.s} // 10000000-00011110-00000011-11000011 +// CHECK-INST: fmop4a za3.s, { z14.s, z15.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xc3,0x03,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e03c3 + +// FMOP4S + +// Single vectors + +fmop4s za0.s, z0.s, z16.s // 10000000-00000000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.s, z16.s +// CHECK-ENCODING: [0x10,0x00,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000010 + +fmop4s za3.s, z12.s, z24.s // 10000000-00001000-00000001-10010011 +// CHECK-INST: fmop4s za3.s, z12.s, z24.s +// CHECK-ENCODING: [0x93,0x01,0x08,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80080193 + +fmop4s za3.s, z14.s, z30.s // 10000000-00001110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.s, z30.s +// CHECK-ENCODING: [0xd3,0x01,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e01d3 + +// Single and multiple vectors + +fmop4s za0.s, z0.s, {z16.s-z17.s} // 10000000-00010000-00000000-00010000 +// CHECK-INST: fmop4s za0.s, z0.s, { z16.s, z17.s } +// CHECK-ENCODING: [0x10,0x00,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100010 + +fmop4s za1.s, z10.s, {z20.s-z21.s} // 10000000-00010100-00000001-01010001 +// CHECK-INST: fmop4s za1.s, z10.s, { z20.s, z21.s } +// CHECK-ENCODING: [0x51,0x01,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140151 + +fmop4s za3.s, z14.s, {z30.s-z31.s} // 10000000-00011110-00000001-11010011 +// CHECK-INST: fmop4s za3.s, z14.s, { z30.s, z31.s } +// CHECK-ENCODING: [0xd3,0x01,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e01d3 + +// Multiple and single vectors + +fmop4s za0.s, {z0.s-z1.s}, z16.s // 10000000-00000000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.s, z1.s }, z16.s +// CHECK-ENCODING: [0x10,0x02,0x00,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80000210 + +fmop4s za1.s, {z10.s-z11.s}, z20.s // 10000000-00000100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.s, z11.s }, z20.s +// CHECK-ENCODING: [0x51,0x03,0x04,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80040351 + +fmop4s za3.s, {z14.s-z15.s}, z30.s // 10000000-00001110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.s, z15.s }, z30.s +// CHECK-ENCODING: [0xd3,0x03,0x0e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 800e03d3 + +// Multiple vectors + +fmop4s za0.s, {z0.s-z1.s}, {z16.s-z17.s} // 10000000-00010000-00000010-00010000 +// CHECK-INST: fmop4s za0.s, { z0.s, z1.s }, { z16.s, z17.s } +// CHECK-ENCODING: [0x10,0x02,0x10,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80100210 + +fmop4s za1.s, {z10.s-z11.s}, {z20.s-z21.s} // 10000000-00010100-00000011-01010001 +// CHECK-INST: fmop4s za1.s, { z10.s, z11.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x51,0x03,0x14,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 80140351 + +fmop4s za3.s, {z14.s-z15.s}, {z30.s-z31.s} // 10000000-00011110-00000011-11010011 +// CHECK-INST: fmop4s za3.s, { z14.s, z15.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xd3,0x03,0x1e,0x80] +// CHECK-ERROR: instruction requires: sme2p2 +// CHECK-UNKNOWN: 801e03d3 diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening-diagnostics.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening-diagnostics.s new file mode 100644 index 00000000000000..ff9602bc12afc2 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening-diagnostics.s @@ -0,0 +1,243 @@ +// RUN: not llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f64f64 < %s 2>&1 | FileCheck %s + +// FMOP4A + +// Single vectors + +fmop4a za0.s, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za8.d, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z15.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z16.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, z12.d, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, z12.d, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, z12.d, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Single and multiple vectors + +fmop4a za0.s, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4a za8.d, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z1.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z16.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4a za0.d, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.d, {z17.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, z0.d, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, z0.d, {z12.d-z13.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4a za0.s, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za8.d, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z1.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z16.d-z17.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z1.d}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, {z0.d-z1.d}, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4a za0.d, {z0.d-z1.d}, z12.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Multiple vectors + +fmop4a za0.s, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za8.d, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z1.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z18.d-z19.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.d-z1.d}, {z19.d-z20.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4a za0.d, {z0.d-z1.d}, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4a za0.d, {z0.d-z1.d}, {z10.d-z11.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// FMOP4S + +// Single vectors + +fmop4s za0.s, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za8.d, z0.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.s, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z15.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z16.d, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z0.d, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, z12.d, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, z12.d, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, z12.d, z31.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Single and multiple vectors + +fmop4s za0.s, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.s..z14.s + +fmop4s za8.d, z0.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.s, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z1.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z16.d, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z0.d..z14.d + +fmop4s za0.d, z0.d, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.d, {z17.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, z0.d, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, z0.d, {z12.d-z13.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +// Multiple and single vectors + +fmop4s za0.s, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za8.d, {z0.d-z1.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.s-z1.s}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z1.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z2.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z16.d-z17.d}, z16.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z1.d}, z16.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, {z0.d-z1.d}, z17.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +fmop4s za0.d, {z0.d-z1.d}, z12.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected even register in z16.d..z30.d + +// Multiple vectors + +fmop4s za0.s, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za8.d, {z0.d-z1.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.s-z1.s}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z1.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z2.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z18.d-z19.d}, {z16.d-z17.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z0-z14, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z1.d}, {z16.s-z17.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.d-z1.d}, {z19.d-z20.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types + +fmop4s za0.d, {z0.d-z1.d}, {z16.d-z18.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction + +fmop4s za0.d, {z0.d-z1.d}, {z10.d-z11.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors in the range z16-z30, where the first vector is a multiple of 2 and with matching element types diff --git a/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening.s b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening.s new file mode 100644 index 00000000000000..b0ad2984ad5acd --- /dev/null +++ b/llvm/test/MC/AArch64/SME2p2/fmop4as-fp64-non-widening.s @@ -0,0 +1,180 @@ + +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2p2,+sme-f64f64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2,+sme-f64f64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2p2,+sme-f64f64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +// FMOP4A + +// Single vectors + +fmop4a za0.d, z0.d, z16.d // 10000000-11000000-00000000-00001000 +// CHECK-INST: fmop4a za0.d, z0.d, z16.d +// CHECK-ENCODING: [0x08,0x00,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00008 + +fmop4a za5.d, z10.d, z20.d // 10000000-11000100-00000001-01001101 +// CHECK-INST: fmop4a za5.d, z10.d, z20.d +// CHECK-ENCODING: [0x4d,0x01,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4014d + +fmop4a za7.d, z14.d, z30.d // 10000000-11001110-00000001-11001111 +// CHECK-INST: fmop4a za7.d, z14.d, z30.d +// CHECK-ENCODING: [0xcf,0x01,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce01cf + +// Single and multiple vectors + +fmop4a za0.d, z0.d, {z16.d-z17.d} // 10000000-11010000-00000000-00001000 +// CHECK-INST: fmop4a za0.d, z0.d, { z16.d, z17.d } +// CHECK-ENCODING: [0x08,0x00,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00008 + +fmop4a za5.d, z10.d, {z20.d-z21.d} // 10000000-11010100-00000001-01001101 +// CHECK-INST: fmop4a za5.d, z10.d, { z20.d, z21.d } +// CHECK-ENCODING: [0x4d,0x01,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4014d + +fmop4a za7.d, z14.d, {z30.d-z31.d} // 10000000-11011110-00000001-11001111 +// CHECK-INST: fmop4a za7.d, z14.d, { z30.d, z31.d } +// CHECK-ENCODING: [0xcf,0x01,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de01cf + +// Multiple and single vectors + +fmop4a za0.d, {z0.d-z1.d}, z16.d // 10000000-11000000-00000010-00001000 +// CHECK-INST: fmop4a za0.d, { z0.d, z1.d }, z16.d +// CHECK-ENCODING: [0x08,0x02,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00208 + +fmop4a za5.d, {z10.d-z11.d}, z20.d // 10000000-11000100-00000011-01001101 +// CHECK-INST: fmop4a za5.d, { z10.d, z11.d }, z20.d +// CHECK-ENCODING: [0x4d,0x03,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4034d + +fmop4a za7.d, {z14.d-z15.d}, z30.d // 10000000-11001110-00000011-11001111 +// CHECK-INST: fmop4a za7.d, { z14.d, z15.d }, z30.d +// CHECK-ENCODING: [0xcf,0x03,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce03cf + +// Multiple vectors + +fmop4a za0.d, {z0.d-z1.d}, {z16.d-z17.d} // 10000000-11010000-00000010-00001000 +// CHECK-INST: fmop4a za0.d, { z0.d, z1.d }, { z16.d, z17.d } +// CHECK-ENCODING: [0x08,0x02,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00208 + +fmop4a za5.d, {z10.d-z11.d}, {z20.d-z21.d} // 10000000-11010100-00000011-01001101 +// CHECK-INST: fmop4a za5.d, { z10.d, z11.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x4d,0x03,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4034d + +fmop4a za7.d, {z14.d-z15.d}, {z30.d-z31.d} // 10000000-11011110-00000011-11001111 +// CHECK-INST: fmop4a za7.d, { z14.d, z15.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0xcf,0x03,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de03cf + + +// FMOP4S + +// Single vectors + +fmop4s za0.d, z0.d, z16.d // 10000000-11000000-00000000-00011000 +// CHECK-INST: fmop4s za0.d, z0.d, z16.d +// CHECK-ENCODING: [0x18,0x00,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00018 + +fmop4s za5.d, z10.d, z20.d // 10000000-11000100-00000001-01011101 +// CHECK-INST: fmop4s za5.d, z10.d, z20.d +// CHECK-ENCODING: [0x5d,0x01,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4015d + +fmop4s za7.d, z14.d, z30.d // 10000000-11001110-00000001-11011111 +// CHECK-INST: fmop4s za7.d, z14.d, z30.d +// CHECK-ENCODING: [0xdf,0x01,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce01df + +// Single and multiple vectors + +fmop4s za0.d, z0.d, {z16.d-z17.d} // 10000000-11010000-00000000-00011000 +// CHECK-INST: fmop4s za0.d, z0.d, { z16.d, z17.d } +// CHECK-ENCODING: [0x18,0x00,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00018 + +fmop4s za5.d, z10.d, {z20.d-z21.d} // 10000000-11010100-00000001-01011101 +// CHECK-INST: fmop4s za5.d, z10.d, { z20.d, z21.d } +// CHECK-ENCODING: [0x5d,0x01,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4015d + +fmop4s za7.d, z14.d, {z30.d-z31.d} // 10000000-11011110-00000001-11011111 +// CHECK-INST: fmop4s za7.d, z14.d, { z30.d, z31.d } +// CHECK-ENCODING: [0xdf,0x01,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de01df + +// Multiple and single vectors + +fmop4s za0.d, {z0.d-z1.d}, z16.d // 10000000-11000000-00000010-00011000 +// CHECK-INST: fmop4s za0.d, { z0.d, z1.d }, z16.d +// CHECK-ENCODING: [0x18,0x02,0xc0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c00218 + +fmop4s za5.d, {z10.d-z11.d}, z20.d // 10000000-11000100-00000011-01011101 +// CHECK-INST: fmop4s za5.d, { z10.d, z11.d }, z20.d +// CHECK-ENCODING: [0x5d,0x03,0xc4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80c4035d + +fmop4s za7.d, {z14.d-z15.d}, z30.d // 10000000-11001110-00000011-11011111 +// CHECK-INST: fmop4s za7.d, { z14.d, z15.d }, z30.d +// CHECK-ENCODING: [0xdf,0x03,0xce,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80ce03df + +// Multiple vectors + +fmop4s za0.d, {z0.d-z1.d}, {z16.d-z17.d} // 10000000-11010000-00000010-00011000 +// CHECK-INST: fmop4s za0.d, { z0.d, z1.d }, { z16.d, z17.d } +// CHECK-ENCODING: [0x18,0x02,0xd0,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d00218 + +fmop4s za5.d, {z10.d-z11.d}, {z20.d-z21.d} // 10000000-11010100-00000011-01011101 +// CHECK-INST: fmop4s za5.d, { z10.d, z11.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x5d,0x03,0xd4,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80d4035d + +fmop4s za7.d, {z14.d-z15.d}, {z30.d-z31.d} // 10000000-11011110-00000011-11011111 +// CHECK-INST: fmop4s za7.d, { z14.d, z15.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0xdf,0x03,0xde,0x80] +// CHECK-ERROR: instruction requires: sme2p2 sme-f64f64 +// CHECK-UNKNOWN: 80de03df diff --git a/llvm/test/MC/AArch64/SVE/compact-diagnostics.s b/llvm/test/MC/AArch64/SVE/compact-diagnostics.s index a3d86267d917b5..b8ff8cc46201f2 100644 --- a/llvm/test/MC/AArch64/SVE/compact-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/compact-diagnostics.s @@ -28,12 +28,12 @@ compact z31.s, p7, z31.d // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: compact z31.b, p7, z31.b -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: compact z31.b, p7, z31.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: compact z31.h, p7, z31.h -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction requires: sme2p2 or sve2p2 // CHECK-NEXT: compact z31.h, p7, z31.h // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/compact.s b/llvm/test/MC/AArch64/SVE/compact.s index ff815980781d79..a9b47dea246bee 100644 --- a/llvm/test/MC/AArch64/SVE/compact.s +++ b/llvm/test/MC/AArch64/SVE/compact.s @@ -12,11 +12,11 @@ compact z31.s, p7, z31.s // CHECK-INST: compact z31.s, p7, z31.s // CHECK-ENCODING: [0xff,0x9f,0xa1,0x05] -// CHECK-ERROR: instruction requires: sve +// CHECK-ERROR: instruction requires: sve or sme2p2 // CHECK-UNKNOWN: 05a19fff compact z31.d, p7, z31.d // CHECK-INST: compact z31.d, p7, z31.d // CHECK-ENCODING: [0xff,0x9f,0xe1,0x05] -// CHECK-ERROR: instruction requires: sve +// CHECK-ERROR: instruction requires: sve or sme2p2 // CHECK-UNKNOWN: 05e19fff diff --git a/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s new file mode 100644 index 00000000000000..acf00e7f7a600f --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/compact-diagnostics.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid element widths + +compact z31.h, p7, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: compact z31.h, p7, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z31.b, p7, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: compact z31.b, p7, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate operation + +compact z23.b, p7/m, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: compact z23.b, p7/m, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.b, p7.b, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.b, p7.b, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.h, p7/z, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: compact z23.h, p7/z, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.h, p7.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.h, p7.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Predicate not in restricted predicate range + +compact z23.b, p8, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.b, p8, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +compact z23.h, p8, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: compact z23.h, p8, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z31.b, p7/z, z6.b +compact z31.b, p7, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: compact z31.b, p7, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z31, z6 +compact z31.h, p7, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: compact z31.h, p7, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/compact.s b/llvm/test/MC/AArch64/SVE2p2/compact.s new file mode 100644 index 00000000000000..0170b3832bea67 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/compact.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +compact z0.b, p0, z0.b // 00000101-00100001-10000000-00000000 +// CHECK-INST: compact z0.b, p0, z0.b +// CHECK-ENCODING: [0x00,0x80,0x21,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05218000 + +compact z21.b, p5, z10.b // 00000101-00100001-10010101-01010101 +// CHECK-INST: compact z21.b, p5, z10.b +// CHECK-ENCODING: [0x55,0x95,0x21,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05219555 + +compact z31.h, p7, z31.h // 00000101-01100001-10011111-11111111 +// CHECK-INST: compact z31.h, p7, z31.h +// CHECK-ENCODING: [0xff,0x9f,0x61,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05619fff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s new file mode 100644 index 00000000000000..b9a95f399a168a --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/expand-diagnostics.s @@ -0,0 +1,120 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid element widths. + +expand z23.b, p3, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.b, p3, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.h, p3, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.s, p3, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.s, p3, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p3, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.d, p3, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.q, p3, z13.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: expand z23.q, p3, z13.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate operation + +expand z23.b, p3/z, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.b, p3/z, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.b, p3.b, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.b, p3.b, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3/m, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.h, p3/m, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.h, p3.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.s, p3/z, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.s, p3/z, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.s, p3.s, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.s, p3.s, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p3/m, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: expand z23.d, p3/m, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p3.d, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.d, p3.d, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Predicate not in restricted predicate range + +expand z23.b, p8, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.b, p8, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.b, p3.b, z13.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.b, p3.b, z13.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p8, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.h, p8, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.h, p3.h, z13.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.h, p3.h, z13.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}} + +expand z23.s, p8, z13.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.s, p8, z13.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +expand z23.d, p8, z13.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: expand z23.d, p8, z13.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Negative tests for instructions that are incompatible with movprfx + +movprfx z31, z6 +expand z31.b, p7, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: expand z31.b, p7, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movprfx z31.b, p0/z, z6.b +expand z31.b, p0, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov +// CHECK-NEXT: expand z31.b, p0, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/expand.s b/llvm/test/MC/AArch64/SVE2p2/expand.s new file mode 100644 index 00000000000000..7523978380fbd7 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/expand.s @@ -0,0 +1,39 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sve2p2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +expand z0.b, p0, z0.b // 00000101-00110001-10000000-00000000 +// CHECK-INST: expand z0.b, p0, z0.b +// CHECK-ENCODING: [0x00,0x80,0x31,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05318000 + +expand z21.h, p5, z10.h // 00000101-01110001-10010101-01010101 +// CHECK-INST: expand z21.h, p5, z10.h +// CHECK-ENCODING: [0x55,0x95,0x71,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05719555 + +expand z23.s, p3, z13.s // 00000101-10110001-10001101-10110111 +// CHECK-INST: expand z23.s, p3, z13.s +// CHECK-ENCODING: [0xb7,0x8d,0xb1,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05b18db7 + +expand z31.d, p7, z31.d // 00000101-11110001-10011111-11111111 +// CHECK-INST: expand z31.d, p7, z31.d +// CHECK-ENCODING: [0xff,0x9f,0xf1,0x05] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 05f19fff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s new file mode 100644 index 00000000000000..4309fd49ecf79f --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/firstp-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid predicate operand + +firstp x0, p15, p0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: firstp x0, p15, p0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +firstp x0, p15.b, p0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: firstp x0, p15.b, p0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +firstp x0, p15.q, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: firstp x0, p15.q, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid register types + +firstp sp, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: firstp sp, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +firstp w0, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: firstp w0, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p2/firstp.s b/llvm/test/MC/AArch64/SVE2p2/firstp.s new file mode 100644 index 00000000000000..629bee5576fc7d --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/firstp.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +firstp x0, p0, p0.b // 00100101-00100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.b +// CHECK-ENCODING: [0x00,0x80,0x21,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25218000 + +firstp x23, p11, p13.b // 00100101-00100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.b +// CHECK-ENCODING: [0xb7,0xad,0x21,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2521adb7 + +firstp xzr, p15, p15.b // 00100101-00100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.b +// CHECK-ENCODING: [0xff,0xbd,0x21,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2521bdff + +firstp x0, p0, p0.h // 00100101-01100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.h +// CHECK-ENCODING: [0x00,0x80,0x61,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25618000 + +firstp x23, p11, p13.h // 00100101-01100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.h +// CHECK-ENCODING: [0xb7,0xad,0x61,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2561adb7 + +firstp xzr, p15, p15.h // 00100101-01100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.h +// CHECK-ENCODING: [0xff,0xbd,0x61,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2561bdff + +firstp x0, p0, p0.s // 00100101-10100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.s +// CHECK-ENCODING: [0x00,0x80,0xa1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a18000 + +firstp x23, p11, p13.s // 00100101-10100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.s +// CHECK-ENCODING: [0xb7,0xad,0xa1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a1adb7 + +firstp xzr, p15, p15.s // 00100101-10100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.s +// CHECK-ENCODING: [0xff,0xbd,0xa1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a1bdff + +firstp x0, p0, p0.d // 00100101-11100001-10000000-00000000 +// CHECK-INST: firstp x0, p0, p0.d +// CHECK-ENCODING: [0x00,0x80,0xe1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e18000 + +firstp x23, p11, p13.d // 00100101-11100001-10101101-10110111 +// CHECK-INST: firstp x23, p11, p13.d +// CHECK-ENCODING: [0xb7,0xad,0xe1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e1adb7 + +firstp xzr, p15, p15.d // 00100101-11100001-10111101-11111111 +// CHECK-INST: firstp xzr, p15, p15.d +// CHECK-ENCODING: [0xff,0xbd,0xe1,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e1bdff \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s b/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s new file mode 100644 index 00000000000000..e277bdbc6aa8b3 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/lastp-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 2>&1 < %s| FileCheck %s + +// ------------------------------------------------------------------------- // +// Invalid predicate operand + +lastp x0, p15, p0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: lastp x0, p15, p0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +lastp x0, p15.b, p0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: lastp x0, p15.b, p0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +lastp x0, p15.q, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid predicate register. +// CHECK-NEXT: lastp x0, p15.q, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// ------------------------------------------------------------------------- // +// Invalid register types + +lastp sp, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: lastp sp, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +lastp w0, p15, p0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: lastp w0, p15, p0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: \ No newline at end of file diff --git a/llvm/test/MC/AArch64/SVE2p2/lastp.s b/llvm/test/MC/AArch64/SVE2p2/lastp.s new file mode 100644 index 00000000000000..1ffa0a7d1fcc19 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p2/lastp.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2p2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sve2p2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p2 < %s \ +// RUN: | llvm-objdump -d --mattr=-sme2 - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// Disassemble encoding and check the re-encoding (-show-encoding) matches. +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sve2p2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +lastp x0, p0, p0.b // 00100101-00100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.b +// CHECK-ENCODING: [0x00,0x80,0x22,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25228000 + +lastp x23, p11, p13.b // 00100101-00100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.b +// CHECK-ENCODING: [0xb7,0xad,0x22,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2522adb7 + +lastp xzr, p15, p15.b // 00100101-00100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.b +// CHECK-ENCODING: [0xff,0xbd,0x22,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2522bdff + +lastp x0, p0, p0.h // 00100101-01100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.h +// CHECK-ENCODING: [0x00,0x80,0x62,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25628000 + +lastp x23, p11, p13.h // 00100101-01100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.h +// CHECK-ENCODING: [0xb7,0xad,0x62,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2562adb7 + +lastp xzr, p15, p15.h // 00100101-01100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.h +// CHECK-ENCODING: [0xff,0xbd,0x62,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 2562bdff + +lastp x0, p0, p0.s // 00100101-10100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.s +// CHECK-ENCODING: [0x00,0x80,0xa2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a28000 + +lastp x23, p11, p13.s // 00100101-10100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.s +// CHECK-ENCODING: [0xb7,0xad,0xa2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a2adb7 + +lastp xzr, p15, p15.s // 00100101-10100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.s +// CHECK-ENCODING: [0xff,0xbd,0xa2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25a2bdff + +lastp x0, p0, p0.d // 00100101-11100010-10000000-00000000 +// CHECK-INST: lastp x0, p0, p0.d +// CHECK-ENCODING: [0x00,0x80,0xe2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e28000 + +lastp x23, p11, p13.d // 00100101-11100010-10101101-10110111 +// CHECK-INST: lastp x23, p11, p13.d +// CHECK-ENCODING: [0xb7,0xad,0xe2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e2adb7 + +lastp xzr, p15, p15.d // 00100101-11100010-10111101-11111111 +// CHECK-INST: lastp xzr, p15, p15.d +// CHECK-ENCODING: [0xff,0xbd,0xe2,0x25] +// CHECK-ERROR: instruction requires: sme2p2 or sve2p2 +// CHECK-UNKNOWN: 25e2bdff \ No newline at end of file diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s new file mode 100644 index 00000000000000..96dd5720894361 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s @@ -0,0 +1,2554 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x64] + +v_add_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x64] + +v_add_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x64] + +v_add_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x64] + +v_add_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x64] + +v_add_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x64] + +v_add_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x64] + +v_add_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x64] + +v_add_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x64] + +v_add_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x64] + +v_add_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x64] + +v_add_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x64] + +v_add_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] + +v_add_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x06] + +v_add_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x06] + +v_add_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x06] + +v_add_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x06] + +v_add_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x06] + +v_add_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x06] + +v_add_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x06] + +v_add_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x06] + +v_add_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x06] + +v_add_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x06] + +v_add_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x06] + +v_add_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x06] + +v_add_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] + +v_add_nc_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x4a] + +v_add_nc_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] + +v_and_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x36] + +v_and_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x36] + +v_and_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x36] + +v_and_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x36] + +v_and_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x36] + +v_and_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x36] + +v_and_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x36] + +v_and_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x36] + +v_and_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x36] + +v_and_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x36] + +v_and_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x36] + +v_and_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x36] + +v_and_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x34] + +v_ashrrev_i32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] + +v_cndmask_b32 v5, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_dot2acc_f32_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x04] + +v_dot2acc_f32_f16 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x04] + +v_dot2acc_f32_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x04] + +v_dot2acc_f32_f16 v255, 0xfe0b, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] + +v_dot2c_f32_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x04] + +v_dot2c_f32_f16 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x04] + +v_dot2c_f32_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x04] + +v_dot2c_f32_f16 v255, 0xfe0b, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, v1, v2, 0xfe0b +// GFX11: encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, v127, v2, 0xfe0b +// GFX11: encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s1, v2, 0xfe0b +// GFX11: encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s105, v2, 0xfe0b +// GFX11: encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b +// GFX11: encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b +// GFX11: encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, ttmp15, v2, 0xfe0b +// GFX11: encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, m0, v2, 0xfe0b +// GFX11: encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_lo, v2, 0xfe0b +// GFX11: encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_hi, v2, 0xfe0b +// GFX11: encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, null, v2, 0xfe0b +// GFX11: encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, -1, v2, 0xfe0b +// GFX11: encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, 0.5, v2, 0xfe0b +// GFX11: encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, src_scc, v2, 0xfe0b +// GFX11: encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b +// GFX11: encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f32 v5, v1, v2, 0xaf123456 +// GFX11: encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, v255, v2, 0xaf123456 +// GFX11: encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s1, v2, 0xaf123456 +// GFX11: encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s105, v2, 0xaf123456 +// GFX11: encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 +// GFX11: encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 +// GFX11: encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 +// GFX11: encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, m0, v2, 0xaf123456 +// GFX11: encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 +// GFX11: encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 +// GFX11: encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, null, v2, 0xaf123456 +// GFX11: encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, -1, v2, 0xaf123456 +// GFX11: encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, 0.5, v2, 0xaf123456 +// GFX11: encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, src_scc, v2, 0xaf123456 +// GFX11: encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 +// GFX11: encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] + +v_fmac_dx9_zero_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0c] + +v_fmac_dx9_zero_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] + +v_fmac_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x6c] + +v_fmac_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x6c] + +v_fmac_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x6c] + +v_fmac_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x6c] + +v_fmac_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x6c] + +v_fmac_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x6c] + +v_fmac_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x6c] + +v_fmac_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x6c] + +v_fmac_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] + +v_fmac_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x56] + +v_fmac_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x56] + +v_fmac_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x56] + +v_fmac_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x56] + +v_fmac_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x56] + +v_fmac_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x56] + +v_fmac_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x56] + +v_fmac_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x56] + +v_fmac_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x56] + +v_fmac_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x56] + +v_fmac_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] + +v_fmac_legacy_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0c] + +v_fmac_legacy_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0c] + +v_fmac_legacy_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0c] + +v_fmac_legacy_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] + +v_fmamk_f16 v5, v1, 0xfe0b, v3 +// GFX11: encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, v127, 0xfe0b, v3 +// GFX11: encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s1, 0xfe0b, v3 +// GFX11: encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s105, 0xfe0b, v3 +// GFX11: encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 +// GFX11: encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 +// GFX11: encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 +// GFX11: encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, m0, 0xfe0b, v3 +// GFX11: encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 +// GFX11: encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 +// GFX11: encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, null, 0xfe0b, v3 +// GFX11: encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, -1, 0xfe0b, v3 +// GFX11: encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, 0.5, 0xfe0b, v3 +// GFX11: encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, src_scc, 0xfe0b, v3 +// GFX11: encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f32 v5, v1, 0xaf123456, v3 +// GFX11: encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, v255, 0xaf123456, v3 +// GFX11: encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s1, 0xaf123456, v3 +// GFX11: encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s105, 0xaf123456, v3 +// GFX11: encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 +// GFX11: encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 +// GFX11: encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 +// GFX11: encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, m0, 0xaf123456, v3 +// GFX11: encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 +// GFX11: encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 +// GFX11: encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, null, 0xaf123456, v3 +// GFX11: encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, -1, 0xaf123456, v3 +// GFX11: encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, 0.5, 0xaf123456, v3 +// GFX11: encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, src_scc, 0xaf123456, v3 +// GFX11: encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] + +v_ldexp_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x76] + +v_ldexp_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x76] + +v_ldexp_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x76] + +v_ldexp_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x76] + +v_ldexp_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x76] + +v_ldexp_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x76] + +v_ldexp_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x76] + +v_ldexp_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x76] + +v_ldexp_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x30] + +v_lshlrev_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x32] + +v_lshrrev_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] + +v_max_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x72] + +v_max_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x72] + +v_max_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x72] + +v_max_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x72] + +v_max_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x72] + +v_max_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x72] + +v_max_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x72] + +v_max_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x72] + +v_max_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x72] + +v_max_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x72] + +v_max_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x72] + +v_max_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x72] + +v_max_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x72] + +v_max_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x72] + +v_max_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] + +v_max_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x20] + +v_max_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x20] + +v_max_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x20] + +v_max_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x20] + +v_max_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x20] + +v_max_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x20] + +v_max_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x20] + +v_max_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x20] + +v_max_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x20] + +v_max_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x20] + +v_max_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x20] + +v_max_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x20] + +v_max_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x20] + +v_max_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x20] + +v_max_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf] + +v_max_i32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x24] + +v_max_i32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x24] + +v_max_i32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x24] + +v_max_i32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x24] + +v_max_i32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x24] + +v_max_i32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x24] + +v_max_i32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x24] + +v_max_i32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x24] + +v_max_i32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x24] + +v_max_i32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x24] + +v_max_i32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x24] + +v_max_i32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x24] + +v_max_i32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] + +v_max_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x28] + +v_max_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x28] + +v_max_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x28] + +v_max_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x28] + +v_max_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x28] + +v_max_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x28] + +v_max_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x28] + +v_max_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x28] + +v_max_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x28] + +v_max_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x28] + +v_max_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x28] + +v_max_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x28] + +v_max_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] + +v_min_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x74] + +v_min_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x74] + +v_min_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x74] + +v_min_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x74] + +v_min_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x74] + +v_min_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x74] + +v_min_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x74] + +v_min_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x74] + +v_min_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x74] + +v_min_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x74] + +v_min_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x74] + +v_min_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x74] + +v_min_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x74] + +v_min_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x74] + +v_min_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] + +v_min_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x1e] + +v_min_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x1e] + +v_min_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x1e] + +v_min_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x1e] + +v_min_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x1e] + +v_min_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x1e] + +v_min_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x1e] + +v_min_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x1e] + +v_min_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x1e] + +v_min_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x1e] + +v_min_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x1e] + +v_min_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x1e] + +v_min_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x1e] + +v_min_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x1e] + +v_min_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf] + +v_min_i32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x22] + +v_min_i32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x22] + +v_min_i32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x22] + +v_min_i32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x22] + +v_min_i32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x22] + +v_min_i32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x22] + +v_min_i32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x22] + +v_min_i32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x22] + +v_min_i32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x22] + +v_min_i32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x22] + +v_min_i32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x22] + +v_min_i32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x22] + +v_min_i32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] + +v_min_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x26] + +v_min_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x26] + +v_min_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x26] + +v_min_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x26] + +v_min_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x26] + +v_min_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x26] + +v_min_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x26] + +v_min_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x26] + +v_min_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x26] + +v_min_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x26] + +v_min_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x26] + +v_min_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x26] + +v_min_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x6a] + +v_mul_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x6a] + +v_mul_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x6a] + +v_mul_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x6a] + +v_mul_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x6a] + +v_mul_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x6a] + +v_mul_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x6a] + +v_mul_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x6a] + +v_mul_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x6a] + +v_mul_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x6a] + +v_mul_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] + +v_mul_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x10] + +v_mul_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x10] + +v_mul_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x10] + +v_mul_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x10] + +v_mul_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x10] + +v_mul_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x10] + +v_mul_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x10] + +v_mul_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x10] + +v_mul_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x10] + +v_mul_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x10] + +v_mul_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x12] + +v_mul_i32_i24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x16] + +v_mul_u32_u24 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] + +v_or_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x38] + +v_or_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x38] + +v_or_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x38] + +v_or_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x38] + +v_or_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x38] + +v_or_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x38] + +v_or_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x38] + +v_or_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x38] + +v_or_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x38] + +v_or_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x38] + +v_or_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x38] + +v_or_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x38] + +v_or_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] + +v_pk_fmac_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x78] + +v_pk_fmac_f16 v255, 0xfe0b, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x66] + +v_sub_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x66] + +v_sub_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x66] + +v_sub_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x66] + +v_sub_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x66] + +v_sub_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x66] + +v_sub_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x66] + +v_sub_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x66] + +v_sub_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x66] + +v_sub_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x66] + +v_sub_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] + +v_sub_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x08] + +v_sub_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x08] + +v_sub_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x08] + +v_sub_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x08] + +v_sub_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x08] + +v_sub_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x08] + +v_sub_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x08] + +v_sub_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x08] + +v_sub_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x08] + +v_sub_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x08] + +v_sub_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x4c] + +v_sub_nc_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x68] + +v_subrev_f16 v5, v127, v2 +// GFX11: encoding: [0x7f,0x05,0x0a,0x68] + +v_subrev_f16 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x68] + +v_subrev_f16 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x68] + +v_subrev_f16 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x68] + +v_subrev_f16 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x68] + +v_subrev_f16 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x68] + +v_subrev_f16 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x68] + +v_subrev_f16 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x68] + +v_subrev_f16 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x68] + +v_subrev_f16 v127, 0xfe0b, v127 +// GFX11: encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] + +v_subrev_f32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x0a] + +v_subrev_f32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x0a] + +v_subrev_f32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x0a] + +v_subrev_f32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x0a] + +v_subrev_f32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x0a] + +v_subrev_f32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x0a] + +v_subrev_f32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x0a] + +v_subrev_f32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x0a] + +v_subrev_f32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] + +v_xnor_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x3c] + +v_xnor_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x3c] + +v_xnor_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x3c] + +v_xnor_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x3c] + +v_xnor_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x3c] + +v_xnor_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x3c] + +v_xnor_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x3c] + +v_xnor_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x3c] + +v_xnor_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] + +v_xor_b32 v5, v1, v2 +// GFX11: encoding: [0x01,0x05,0x0a,0x3a] + +v_xor_b32 v5, v255, v2 +// GFX11: encoding: [0xff,0x05,0x0a,0x3a] + +v_xor_b32 v5, s1, v2 +// GFX11: encoding: [0x01,0x04,0x0a,0x3a] + +v_xor_b32 v5, s105, v2 +// GFX11: encoding: [0x69,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_lo, v2 +// GFX11: encoding: [0x6a,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_hi, v2 +// GFX11: encoding: [0x6b,0x04,0x0a,0x3a] + +v_xor_b32 v5, ttmp15, v2 +// GFX11: encoding: [0x7b,0x04,0x0a,0x3a] + +v_xor_b32 v5, m0, v2 +// GFX11: encoding: [0x7d,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_lo, v2 +// GFX11: encoding: [0x7e,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_hi, v2 +// GFX11: encoding: [0x7f,0x04,0x0a,0x3a] + +v_xor_b32 v5, null, v2 +// GFX11: encoding: [0x7c,0x04,0x0a,0x3a] + +v_xor_b32 v5, -1, v2 +// GFX11: encoding: [0xc1,0x04,0x0a,0x3a] + +v_xor_b32 v5, 0.5, v2 +// GFX11: encoding: [0xf0,0x04,0x0a,0x3a] + +v_xor_b32 v5, src_scc, v2 +// GFX11: encoding: [0xfd,0x04,0x0a,0x3a] + +v_xor_b32 v255, 0xaf123456, v255 +// GFX11: encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s index fb300b2e94972d..2a4b3ea2017013 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x40] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s new file mode 100644 index 00000000000000..6b9092f501e5af --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16-fake16.s @@ -0,0 +1,2114 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] + +v_add_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] + +v_add_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] + +v_add_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] + +v_add_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x09,0x13] + +v_add_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xf5,0x30] + +v_add_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] + +v_add_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] + +v_add_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] + +v_add_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] + +v_add_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x09,0x13] + +v_add_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xf5,0x30] + +v_add_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] + +v_add_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x09,0x13] + +v_add_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x05,0x30] + +v_and_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] + +v_and_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] + +v_and_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] + +v_and_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] + +v_and_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x09,0x13] + +v_and_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x05,0x30] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] + +v_ashrrev_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x09,0x13] + +v_ashrrev_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x05,0x30] + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -v1, |v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, |v1|, -v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pk_rtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pkrtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_dot2acc_f32_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] + +v_dot2acc_f32_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] + +v_dot2acc_f32_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x09,0x13] + +v_dot2acc_f32_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xf5,0x30] + +v_dot2c_f32_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] + +v_dot2c_f32_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] + +v_dot2c_f32_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] + +v_dot2c_f32_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x09,0x13] + +v_dot2c_f32_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xf5,0x30] + +v_fmac_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] + +v_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] + +v_fmac_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] + +v_fmac_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x09,0x13] + +v_fmac_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xf5,0x30] + +v_fmac_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] + +v_fmac_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] + +v_fmac_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] + +v_fmac_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x09,0x13] + +v_fmac_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xf5,0x30] + +v_ldexp_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] + +v_ldexp_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x09,0x13] + +v_ldexp_f16 v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x35,0x30] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] + +v_lshlrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x09,0x13] + +v_lshlrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x05,0x30] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] + +v_lshrrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x09,0x13] + +v_lshrrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x05,0x30] + +v_max_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] + +v_max_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] + +v_max_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] + +v_max_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] + +v_max_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] + +v_max_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] + +v_max_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] + +v_max_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] + +v_max_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] + +v_max_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x09,0x13] + +v_max_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xf5,0x30] + +v_max_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff] + +v_max_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff] + +v_max_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff] + +v_max_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff] + +v_max_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff] + +v_max_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff] + +v_max_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff] + +v_max_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff] + +v_max_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01] + +v_max_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x20,0x01,0x60,0x09,0x13] + +v_max_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x21,0xff,0x6f,0xf5,0x30] + +v_max_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] + +v_max_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] + +v_max_i32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] + +v_max_i32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] + +v_max_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x09,0x13] + +v_max_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x05,0x30] + +v_max_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] + +v_max_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] + +v_max_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] + +v_max_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] + +v_max_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x09,0x13] + +v_max_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x05,0x30] + +v_min_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] + +v_min_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] + +v_min_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] + +v_min_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] + +v_min_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] + +v_min_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] + +v_min_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] + +v_min_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] + +v_min_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] + +v_min_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x09,0x13] + +v_min_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xf5,0x30] + +v_min_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff] + +v_min_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff] + +v_min_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff] + +v_min_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff] + +v_min_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff] + +v_min_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff] + +v_min_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff] + +v_min_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff] + +v_min_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01] + +v_min_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x60,0x09,0x13] + +v_min_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xf5,0x30] + +v_min_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] + +v_min_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] + +v_min_i32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] + +v_min_i32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] + +v_min_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x09,0x13] + +v_min_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x05,0x30] + +v_min_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] + +v_min_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] + +v_min_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] + +v_min_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] + +v_min_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x09,0x13] + +v_min_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x05,0x30] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] + +v_mul_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] + +v_mul_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] + +v_mul_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x09,0x13] + +v_mul_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xf5,0x30] + +v_mul_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] + +v_mul_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] + +v_mul_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] + +v_mul_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x09,0x13] + +v_mul_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xf5,0x30] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] + +v_mul_hi_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x09,0x13] + +v_mul_hi_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x05,0x30] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] + +v_mul_hi_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x09,0x13] + +v_mul_hi_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x05,0x30] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] + +v_mul_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x09,0x13] + +v_mul_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x05,0x30] + +v_mul_legacy_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_legacy_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_legacy_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_legacy_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_legacy_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] + +v_mul_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x09,0x13] + +v_mul_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x05,0x30] + +v_or_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] + +v_or_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] + +v_or_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] + +v_or_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] + +v_or_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x09,0x13] + +v_or_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x05,0x30] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] + +v_sub_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] + +v_sub_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] + +v_sub_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x09,0x13] + +v_sub_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xf5,0x30] + +v_sub_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] + +v_sub_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] + +v_sub_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] + +v_sub_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x09,0x13] + +v_sub_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xf5,0x30] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] + +v_sub_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x09,0x13] + +v_sub_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x05,0x30] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] + +v_subrev_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] + +v_subrev_f16 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] + +v_subrev_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x09,0x13] + +v_subrev_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xf5,0x30] + +v_subrev_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] + +v_subrev_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] + +v_subrev_f32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] + +v_subrev_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x09,0x13] + +v_subrev_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xf5,0x30] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] + +v_subrev_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x09,0x13] + +v_subrev_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x05,0x30] + +v_xnor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] + +v_xnor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] + +v_xnor_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] + +v_xnor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x09,0x13] + +v_xnor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x05,0x30] + +v_xor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] + +v_xor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] + +v_xor_b32 v5, v1, v2 row_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_half_mirror +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:15 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] + +v_xor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x09,0x13] + +v_xor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s index 62c0deaecd96a5..3eff00bb96e475 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s new file mode 100644 index 00000000000000..a4fea037a4de7d --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8-fake16.s @@ -0,0 +1,451 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_dot2acc_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2acc_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2acc_f32_f16 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] + +v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2c_f32_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] + +v_dot2c_f32_f16 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] + +v_max_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] + +v_max_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] + +v_max_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] + +v_max_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] + +v_max_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] + +v_max_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x21,0xff,0x00,0x00,0x00] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] + +v_min_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] + +v_min_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] + +v_min_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] + +v_min_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] + +v_min_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] + +v_min_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] + +v_mul_legacy_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: encoding: [0xea,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: encoding: [0xe9,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s index d235fcdeb526aa..0f19cf0028525f 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX11,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err-fake16.s new file mode 100644 index 00000000000000..2d52828d1e2834 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err-fake16.s @@ -0,0 +1,13 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s + +v_fmaak_f32 v0, 0xff32, v0, 0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed + +v_fmaak_f16 v0, 0xff32, v0, 0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed + +v_fmamk_f32 v0, 0xff32, 1, v0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed + +v_fmamk_f16 v0, 0xff32, 1, v0 +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s index 164a49dcdd47b6..dedbcb55d7976d 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_err.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s v_fmaak_f32 v0, 0xff32, v0, 0 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: only one unique literal operand is allowed diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s index 76b1c38fad43d9..dd619f3077f704 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_err.s @@ -1,237 +1,238 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s -v_add_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v255, v1, v2, 0xfe0b -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v255.l, v1.l, v2.l -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v255, v1, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v255, v1, v2, 0xfe0b +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v5, v1, v255, 0xfe0b +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmaak_f16_e32 v5, v255, v2, 0xfe0b -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_fmac_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v5.l, v255.l, v2.l -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v5, v255, v2 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v5, v1, v255, 0xfe0b -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmamk_f16_e32 v5, v1, 0xfe0b, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v5.l, v1.l, v255.l -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction -v_min_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction -v_mul_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_sub_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_subrev_f16_e32 v5, v1, v255 -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction -v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction -v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v255.l, v1.l, v2.l +// GFX11: :[[@LINE-1]]:17: error: invalid operand for instruction -v_ldexp_f16_dpp v255.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_ldexp_f16_e32 v5.l, v1.l, v255.l +// GFX11: :[[@LINE-1]]:29: error: invalid operand for instruction -v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v5.l, v255.l, v2.l +// GFX11: :[[@LINE-1]]:23: error: invalid operand for instruction -v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v255.l, v2.l quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_max_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v1.l, v255.l quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v255.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_mul_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v255.l, v2.l dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5.l, v1.l, v255.l dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction -v_max_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v255, v1, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v5, v1, v255 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v5, v255, v2 +// GFX11: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s index a5b5f32e976226..a6dcce40fd0e03 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2_t16_promote.s @@ -1,201 +1,202 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=_e32 %s v_add_f16 v255, v1, v2 -// GFX11: v_add_f16_e64 +// GFX11: v_add_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v255, v1, v2 -// GFX11: v_fmac_f16_e64 - -v_ldexp_f16 v255, v1, v2 -// GFX11: v_ldexp_f16_e64 - -v_max_f16 v255, v1, v2 -// GFX11: v_max_f16_e64 +v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_f16 v255, v1, v2 -// GFX11: v_min_f16_e64 +v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 -// GFX11: v_mul_f16_e64 +v_add_f16 v5, v1, v255 +// GFX11: v_add_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0xff,0x03,0x00] -v_sub_f16 v255, v1, v2 -// GFX11: v_sub_f16_e64 +v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_subrev_f16 v255, v1, v2 -// GFX11: v_subrev_f16_e64 +v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] v_add_f16 v5, v255, v2 -// GFX11: v_add_f16_e64 - -v_fmac_f16 v5, v255, v2 -// GFX11: v_fmac_f16_e64 +// GFX11: v_add_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 -// GFX11: v_ldexp_f16_e64 +v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_max_f16 v5, v255, v2 -// GFX11: v_max_f16_e64 +v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_add_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_min_f16 v5, v255, v2 -// GFX11: v_min_f16_e64 +v_fmac_f16 v255, v1, v2 +// GFX11: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] -v_mul_f16 v5, v255, v2 -// GFX11: v_mul_f16_e64 +v_fmac_f16 v5, v1, v255 +// GFX11: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] -v_sub_f16 v5, v255, v2 -// GFX11: v_sub_f16_e64 +v_fmac_f16 v5, v255, v2 +// GFX11: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] -v_subrev_f16 v5, v255, v2 -// GFX11: v_subrev_f16_e64 +v_ldexp_f16 v255, v1, v2 +// GFX11: v_ldexp_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] -v_add_f16 v5, v1, v255 -// GFX11: v_add_f16_e64 +v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_fmac_f16 v5, v1, v255 -// GFX11: v_fmac_f16_e64 +v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_ldexp_f16 v5, v1, v255 -// GFX11: v_ldexp_f16_e64 +// GFX11: v_ldexp_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x3b,0xd5,0x01,0xff,0x03,0x00] -v_max_f16 v5, v1, v255 -// GFX11: v_max_f16_e64 +v_ldexp_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_f16 v5, v1, v255 -// GFX11: v_min_f16_e64 +v_ldexp_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v5, v1, v255 -// GFX11: v_mul_f16_e64 +v_ldexp_f16 v5, v255, v2 +// GFX11: v_ldexp_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v5, v1, v255 -// GFX11: v_sub_f16_e64 +v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v5, v1, v255 -// GFX11: v_subrev_f16_e64 +v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_ldexp_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_e64 +v_max_f16 v255, v1, v2 +// GFX11: v_max_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x39,0xd5,0x01,0x05,0x02,0x00] -v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] v_max_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64 - -v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64 +// GFX11: v_max_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_mul_f16_e64 +v_max_f16 v5, v1, v255 +// GFX11: v_max_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x39,0xd5,0x01,0xff,0x03,0x00] -v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_sub_f16_e64 +v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_e64 +v_max_f16 v5, v255, v2 +// GFX11: v_max_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x39,0xd5,0xff,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_max_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] v_max_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64 +// GFX11: v_max_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64 +v_min_f16 v255, v1, v2 +// GFX11: v_min_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x3a,0xd5,0x01,0x05,0x02,0x00] -v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_mul_f16_e64 +v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_sub_f16_e64 +v_min_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_min_f16 v5, v1, v255 +// GFX11: v_min_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x3a,0xd5,0x01,0xff,0x03,0x00] -v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_add_f16_e64 +v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_ldexp_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_max_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_max_f16_e64 +v_min_f16 v5, v255, v2 +// GFX11: v_min_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x3a,0xd5,0xff,0x05,0x02,0x00] -v_min_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_min_f16_e64 +v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3a,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_mul_f16_e64 +v_min_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_min_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_sub_f16_e64 +v_mul_f16 v255, v1, v2 +// GFX11: v_mul_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] -v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_e64 +v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_mul_f16 v5, v1, v255 +// GFX11: v_mul_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0xff,0x03,0x00] -v_max_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64 +v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64 +v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mul_f16_e64 +v_mul_f16 v5, v255, v2 +// GFX11: v_mul_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sub_f16_e64 +v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_mul_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_e64 +v_sub_f16 v255, v1, v2 +// GFX11: v_sub_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_max_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64 +v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64 +v_sub_f16 v5, v1, v255 +// GFX11: v_sub_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0xff,0x03,0x00] -v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mul_f16_e64 +v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sub_f16_e64 +v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_subrev_f16_e64 +v_sub_f16 v5, v255, v2 +// GFX11: v_sub_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0x05,0x02,0x00] -v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_add_f16_e64 +v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_ldexp_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_ldexp_f16_e64 +v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_sub_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_max_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_max_f16_e64 +v_subrev_f16 v255, v1, v2 +// GFX11: v_subrev_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] -v_min_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_min_f16_e64 +v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_mul_f16_e64 +v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_sub_f16_e64 +v_subrev_f16 v5, v1, v255 +// GFX11: v_subrev_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0xff,0x03,0x00] v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_subrev_f16_e64 +// GFX11: v_subrev_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_subrev_f16 v5, v255, v2 +// GFX11: v_subrev_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0x05,0x02,0x00] + +v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] + +v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX11: v_subrev_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s index 196d75db426052..8bf9b92e8d1d8d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vimage.s @@ -158,6 +158,12 @@ image_load v[0:2], [v4, v5], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY th:TH_LO image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00] +image_load v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_load v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] @@ -408,6 +414,12 @@ image_store v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_STORE_BYPASS scope image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +image_store v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_store v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] @@ -568,6 +580,12 @@ image_atomic_swap v[254:255], [v4, v5], s[96:103] dmask:0x3 dim:SQ_RSRC_IMG_2D_M image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +image_atomic_swap v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_atomic_swap v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] @@ -625,6 +643,12 @@ image_atomic_add_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_N image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D // GFX12: encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] +image_atomic_add_uint v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D +// GFX12: encoding: [0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] + +image_atomic_add_uint v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY +// GFX12: encoding: [0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] + image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D // GFX12: encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s new file mode 100644 index 00000000000000..4c37502e1b247f --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s @@ -0,0 +1,2560 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x40] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x40] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x64] + +v_add_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x64] + +v_add_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x64] + +v_add_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x64] + +v_add_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x64] + +v_add_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x64] + +v_add_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x64] + +v_add_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x64] + +v_add_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x64] + +v_add_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x64] + +v_add_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x64] + +v_add_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x64] + +v_add_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x64] + +v_add_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] + +v_add_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x06] + +v_add_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x06] + +v_add_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x06] + +v_add_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x06] + +v_add_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x06] + +v_add_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x06] + +v_add_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x06] + +v_add_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x06] + +v_add_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x06] + +v_add_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x06] + +v_add_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x06] + +v_add_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x06] + +v_add_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x06] + +v_add_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] + +v_add_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x04] + +v_add_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x04] + +v_add_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x04] + +v_add_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x04] + +v_add_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x04] + +v_add_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x04] + +v_add_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x04] + +v_add_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x04] + +v_add_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x04] + +v_add_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x04] + +v_add_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x04] + +v_add_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf] + +v_add_nc_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x4a] + +v_add_nc_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x4a] + +v_add_nc_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x4a] + +v_add_nc_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] + +v_and_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x36] + +v_and_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x36] + +v_and_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x36] + +v_and_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x36] + +v_and_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x36] + +v_and_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x36] + +v_and_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x36] + +v_and_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x36] + +v_and_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x36] + +v_and_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x36] + +v_and_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x36] + +v_and_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x36] + +v_and_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x36] + +v_and_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] + +v_ashrrev_i32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x34] + +v_ashrrev_i32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x34] + +v_ashrrev_i32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x34] + +v_ashrrev_i32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] + +v_cndmask_b32 v5, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x02] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x02] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pk_rtz_f16_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x5e] + +v_cvt_pkrtz_f16_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] + +v_fmaak_f16 v5, v1, v2, 0xfe0b +// GFX12: encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, v127, v2, 0xfe0b +// GFX12: encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s1, v2, 0xfe0b +// GFX12: encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, s105, v2, 0xfe0b +// GFX12: encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b +// GFX12: encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b +// GFX12: encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, ttmp15, v2, 0xfe0b +// GFX12: encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, m0, v2, 0xfe0b +// GFX12: encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_lo, v2, 0xfe0b +// GFX12: encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, exec_hi, v2, 0xfe0b +// GFX12: encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, null, v2, 0xfe0b +// GFX12: encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, -1, v2, 0xfe0b +// GFX12: encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, 0.5, v2, 0xfe0b +// GFX12: encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v5, src_scc, v2, 0xfe0b +// GFX12: encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b +// GFX12: encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] + +v_fmaak_f32 v5, v1, v2, 0xaf123456 +// GFX12: encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, v255, v2, 0xaf123456 +// GFX12: encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s1, v2, 0xaf123456 +// GFX12: encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, s105, v2, 0xaf123456 +// GFX12: encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 +// GFX12: encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 +// GFX12: encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 +// GFX12: encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, m0, v2, 0xaf123456 +// GFX12: encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 +// GFX12: encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 +// GFX12: encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, null, v2, 0xaf123456 +// GFX12: encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, -1, v2, 0xaf123456 +// GFX12: encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, 0.5, v2, 0xaf123456 +// GFX12: encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v5, src_scc, v2, 0xaf123456 +// GFX12: encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] + +v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 +// GFX12: encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] + +v_fmac_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x6c] + +v_fmac_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x6c] + +v_fmac_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x6c] + +v_fmac_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x6c] + +v_fmac_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x6c] + +v_fmac_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x6c] + +v_fmac_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x6c] + +v_fmac_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x6c] + +v_fmac_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x6c] + +v_fmac_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x6c] + +v_fmac_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x6c] + +v_fmac_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] + +v_fmac_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x56] + +v_fmac_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x56] + +v_fmac_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x56] + +v_fmac_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x56] + +v_fmac_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x56] + +v_fmac_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x56] + +v_fmac_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x56] + +v_fmac_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x56] + +v_fmac_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x56] + +v_fmac_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x56] + +v_fmac_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x56] + +v_fmac_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x56] + +v_fmac_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] + +v_fmamk_f16 v5, v1, 0xfe0b, v3 +// GFX12: encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, v127, 0xfe0b, v3 +// GFX12: encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s1, 0xfe0b, v3 +// GFX12: encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, s105, 0xfe0b, v3 +// GFX12: encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 +// GFX12: encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 +// GFX12: encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 +// GFX12: encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, m0, 0xfe0b, v3 +// GFX12: encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 +// GFX12: encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 +// GFX12: encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, null, 0xfe0b, v3 +// GFX12: encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, -1, 0xfe0b, v3 +// GFX12: encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, 0.5, 0xfe0b, v3 +// GFX12: encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v5, src_scc, 0xfe0b, v3 +// GFX12: encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] + +v_fmamk_f32 v5, v1, 0xaf123456, v3 +// GFX12: encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, v255, 0xaf123456, v3 +// GFX12: encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s1, 0xaf123456, v3 +// GFX12: encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, s105, 0xaf123456, v3 +// GFX12: encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 +// GFX12: encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 +// GFX12: encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 +// GFX12: encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, m0, 0xaf123456, v3 +// GFX12: encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 +// GFX12: encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 +// GFX12: encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, null, 0xaf123456, v3 +// GFX12: encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, -1, 0xaf123456, v3 +// GFX12: encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, 0.5, 0xaf123456, v3 +// GFX12: encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v5, src_scc, 0xaf123456, v3 +// GFX12: encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] + +v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] + +v_ldexp_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x76] + +v_ldexp_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x76] + +v_ldexp_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x76] + +v_ldexp_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x76] + +v_ldexp_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x76] + +v_ldexp_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x76] + +v_ldexp_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x76] + +v_ldexp_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x76] + +v_ldexp_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x76] + +v_ldexp_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x76] + +v_ldexp_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x76] + +v_ldexp_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x30] + +v_lshlrev_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x30] + +v_lshlrev_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x30] + +v_lshlrev_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] + +v_lshlrev_b64 v[5:6], v1, v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], v255, v[2:3] +// GFX12: encoding: [0xff,0x05,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], s1, v[2:3] +// GFX12: encoding: [0x01,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], s105, v[2:3] +// GFX12: encoding: [0x69,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], vcc_lo, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], vcc_hi, v[2:3] +// GFX12: encoding: [0x6b,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], ttmp15, v[2:3] +// GFX12: encoding: [0x7b,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], exec_lo, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], exec_hi, v[2:3] +// GFX12: encoding: [0x7f,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x3e] + +v_lshlrev_b64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf] + +v_lshrrev_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x32] + +v_lshrrev_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x32] + +v_lshrrev_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x32] + +v_lshrrev_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] + +v_max_num_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x62] + +v_max_num_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x62] + +v_max_num_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x62] + +v_max_num_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x62] + +v_max_num_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x62] + +v_max_num_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x62] + +v_max_num_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x62] + +v_max_num_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x62] + +v_max_num_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x62] + +v_max_num_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x62] + +v_max_num_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x62] + +v_max_num_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x62] + +v_max_num_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x62] + +v_max_num_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x62] + +v_max_num_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] + +v_max_num_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x2c] + +v_max_num_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x2c] + +v_max_num_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x2c] + +v_max_num_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x2c] + +v_max_num_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x2c] + +v_max_num_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x2c] + +v_max_num_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x2c] + +v_max_num_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x2c] + +v_max_num_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x2c] + +v_max_num_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x2c] + +v_max_num_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x2c] + +v_max_num_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x2c] + +v_max_num_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x2c] + +v_max_num_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x2c] + +v_max_num_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf] + +v_max_num_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x1c] + +v_max_num_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x1c] + +v_max_num_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x1c] + +v_max_num_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x1c] + +v_max_num_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf] + +v_max_i32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x24] + +v_max_i32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x24] + +v_max_i32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x24] + +v_max_i32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x24] + +v_max_i32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x24] + +v_max_i32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x24] + +v_max_i32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x24] + +v_max_i32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x24] + +v_max_i32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x24] + +v_max_i32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x24] + +v_max_i32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x24] + +v_max_i32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x24] + +v_max_i32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x24] + +v_max_i32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] + +v_max_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x28] + +v_max_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x28] + +v_max_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x28] + +v_max_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x28] + +v_max_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x28] + +v_max_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x28] + +v_max_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x28] + +v_max_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x28] + +v_max_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x28] + +v_max_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x28] + +v_max_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x28] + +v_max_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x28] + +v_max_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x28] + +v_max_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] + +v_min_num_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x60] + +v_min_num_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x60] + +v_min_num_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x60] + +v_min_num_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x60] + +v_min_num_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x60] + +v_min_num_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x60] + +v_min_num_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x60] + +v_min_num_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x60] + +v_min_num_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x60] + +v_min_num_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x60] + +v_min_num_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x60] + +v_min_num_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x60] + +v_min_num_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x60] + +v_min_num_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x60] + +v_min_num_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] + +v_min_num_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x2a] + +v_min_num_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x2a] + +v_min_num_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x2a] + +v_min_num_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x2a] + +v_min_num_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x2a] + +v_min_num_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x2a] + +v_min_num_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x2a] + +v_min_num_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x2a] + +v_min_num_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x2a] + +v_min_num_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x2a] + +v_min_num_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x2a] + +v_min_num_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x2a] + +v_min_num_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x2a] + +v_min_num_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x2a] + +v_min_num_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf] + +v_min_num_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x1a] + +v_min_num_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x1a] + +v_min_num_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x1a] + +v_min_num_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x1a] + +v_min_num_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf] + +v_min_i32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x22] + +v_min_i32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x22] + +v_min_i32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x22] + +v_min_i32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x22] + +v_min_i32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x22] + +v_min_i32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x22] + +v_min_i32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x22] + +v_min_i32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x22] + +v_min_i32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x22] + +v_min_i32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x22] + +v_min_i32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x22] + +v_min_i32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x22] + +v_min_i32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x22] + +v_min_i32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] + +v_min_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x26] + +v_min_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x26] + +v_min_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x26] + +v_min_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x26] + +v_min_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x26] + +v_min_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x26] + +v_min_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x26] + +v_min_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x26] + +v_min_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x26] + +v_min_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x26] + +v_min_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x26] + +v_min_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x26] + +v_min_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x26] + +v_min_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] + +v_mul_dx9_zero_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_dx9_zero_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x6a] + +v_mul_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x6a] + +v_mul_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x6a] + +v_mul_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x6a] + +v_mul_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x6a] + +v_mul_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x6a] + +v_mul_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x6a] + +v_mul_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x6a] + +v_mul_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x6a] + +v_mul_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x6a] + +v_mul_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x6a] + +v_mul_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x6a] + +v_mul_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] + +v_mul_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x10] + +v_mul_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x10] + +v_mul_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x10] + +v_mul_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x10] + +v_mul_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x10] + +v_mul_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x10] + +v_mul_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x10] + +v_mul_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x10] + +v_mul_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x10] + +v_mul_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x10] + +v_mul_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x10] + +v_mul_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x10] + +v_mul_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] + +v_mul_f64 v[5:6], v[1:2], v[3:4] +// GFX12: encoding: [0x01,0x07,0x0a,0x0c] + +v_mul_f64 v[5:6], v[254:255], v[2:3] +// GFX12: encoding: [0xfe,0x05,0x0a,0x0c] + +v_mul_f64 v[5:6], s[0:1], v[2:3] +// GFX12: encoding: [0x00,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], s[104:105], v[2:3] +// GFX12: encoding: [0x68,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], vcc, v[2:3] +// GFX12: encoding: [0x6a,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], ttmp[14:15], v[2:3] +// GFX12: encoding: [0x7a,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], exec, v[2:3] +// GFX12: encoding: [0x7e,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], null, v[2:3] +// GFX12: encoding: [0x7c,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], -1, v[2:3] +// GFX12: encoding: [0xc1,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], 0.5, v[2:3] +// GFX12: encoding: [0xf0,0x04,0x0a,0x0c] + +v_mul_f64 v[5:6], src_scc, v[2:3] +// GFX12: encoding: [0xfd,0x04,0x0a,0x0c] + +v_mul_f64 v[254:255], 0xaf123456, v[254:255] +// GFX12: encoding: [0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf] + +v_mul_hi_i32_i24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x14] + +v_mul_hi_i32_i24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] + +v_mul_hi_u32_u24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x18] + +v_mul_hi_u32_u24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] + +v_mul_i32_i24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x12] + +v_mul_i32_i24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x12] + +v_mul_i32_i24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x12] + +v_mul_i32_i24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] + +v_mul_legacy_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x0e] + +v_mul_legacy_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x0e] + +v_mul_legacy_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] + +v_mul_u32_u24 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x16] + +v_mul_u32_u24 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x16] + +v_mul_u32_u24 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x16] + +v_mul_u32_u24 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] + +v_or_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x38] + +v_or_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x38] + +v_or_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x38] + +v_or_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x38] + +v_or_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x38] + +v_or_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x38] + +v_or_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x38] + +v_or_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x38] + +v_or_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x38] + +v_or_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x38] + +v_or_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x38] + +v_or_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x38] + +v_or_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x38] + +v_or_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] + +v_pk_fmac_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x78] + +v_pk_fmac_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x78] + +v_pk_fmac_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x78] + +v_pk_fmac_f16 v255, 0xfe0b, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x42] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x42] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x66] + +v_sub_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x66] + +v_sub_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x66] + +v_sub_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x66] + +v_sub_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x66] + +v_sub_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x66] + +v_sub_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x66] + +v_sub_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x66] + +v_sub_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x66] + +v_sub_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x66] + +v_sub_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x66] + +v_sub_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x66] + +v_sub_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] + +v_sub_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x08] + +v_sub_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x08] + +v_sub_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x08] + +v_sub_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x08] + +v_sub_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x08] + +v_sub_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x08] + +v_sub_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x08] + +v_sub_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x08] + +v_sub_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x08] + +v_sub_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x08] + +v_sub_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x08] + +v_sub_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x08] + +v_sub_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] + +v_sub_nc_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x4c] + +v_sub_nc_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x4c] + +v_sub_nc_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x4c] + +v_sub_nc_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo +// W32: encoding: [0x01,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v255, v2, vcc_lo +// W32: encoding: [0xff,0x05,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s1, v2, vcc_lo +// W32: encoding: [0x01,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, s105, v2, vcc_lo +// W32: encoding: [0x69,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_lo, v2, vcc_lo +// W32: encoding: [0x6a,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, vcc_hi, v2, vcc_lo +// W32: encoding: [0x6b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, ttmp15, v2, vcc_lo +// W32: encoding: [0x7b,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, m0, v2, vcc_lo +// W32: encoding: [0x7d,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_lo, v2, vcc_lo +// W32: encoding: [0x7e,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, exec_hi, v2, vcc_lo +// W32: encoding: [0x7f,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, null, v2, vcc_lo +// W32: encoding: [0x7c,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, -1, v2, vcc_lo +// W32: encoding: [0xc1,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, 0.5, v2, vcc_lo +// W32: encoding: [0xf0,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, src_scc, v2, vcc_lo +// W32: encoding: [0xfd,0x04,0x0a,0x44] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, 0xaf123456, v255, vcc_lo +// W32: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc +// W64: encoding: [0x01,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v255, v2, vcc +// W64: encoding: [0xff,0x05,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s1, v2, vcc +// W64: encoding: [0x01,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, s105, v2, vcc +// W64: encoding: [0x69,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_lo, v2, vcc +// W64: encoding: [0x6a,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, vcc_hi, v2, vcc +// W64: encoding: [0x6b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, ttmp15, v2, vcc +// W64: encoding: [0x7b,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, m0, v2, vcc +// W64: encoding: [0x7d,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_lo, v2, vcc +// W64: encoding: [0x7e,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, exec_hi, v2, vcc +// W64: encoding: [0x7f,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, null, v2, vcc +// W64: encoding: [0x7c,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, -1, v2, vcc +// W64: encoding: [0xc1,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, 0.5, v2, vcc +// W64: encoding: [0xf0,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, src_scc, v2, vcc +// W64: encoding: [0xfd,0x04,0x0a,0x44] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, 0xaf123456, v255, vcc +// W64: encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x68] + +v_subrev_f16 v5, v127, v2 +// GFX12: encoding: [0x7f,0x05,0x0a,0x68] + +v_subrev_f16 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x68] + +v_subrev_f16 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x68] + +v_subrev_f16 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x68] + +v_subrev_f16 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x68] + +v_subrev_f16 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x68] + +v_subrev_f16 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x68] + +v_subrev_f16 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x68] + +v_subrev_f16 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x68] + +v_subrev_f16 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x68] + +v_subrev_f16 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x68] + +v_subrev_f16 v127, 0xfe0b, v127 +// GFX12: encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] + +v_subrev_f32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x0a] + +v_subrev_f32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x0a] + +v_subrev_f32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x0a] + +v_subrev_f32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x0a] + +v_subrev_f32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x0a] + +v_subrev_f32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x0a] + +v_subrev_f32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x0a] + +v_subrev_f32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x0a] + +v_subrev_f32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x0a] + +v_subrev_f32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x0a] + +v_subrev_f32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x0a] + +v_subrev_f32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] + +v_subrev_nc_u32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x4e] + +v_subrev_nc_u32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x4e] + +v_subrev_nc_u32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] + +v_xnor_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x3c] + +v_xnor_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x3c] + +v_xnor_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x3c] + +v_xnor_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x3c] + +v_xnor_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x3c] + +v_xnor_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x3c] + +v_xnor_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x3c] + +v_xnor_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x3c] + +v_xnor_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x3c] + +v_xnor_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x3c] + +v_xnor_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x3c] + +v_xnor_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] + +v_xor_b32 v5, v1, v2 +// GFX12: encoding: [0x01,0x05,0x0a,0x3a] + +v_xor_b32 v5, v255, v2 +// GFX12: encoding: [0xff,0x05,0x0a,0x3a] + +v_xor_b32 v5, s1, v2 +// GFX12: encoding: [0x01,0x04,0x0a,0x3a] + +v_xor_b32 v5, s105, v2 +// GFX12: encoding: [0x69,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_lo, v2 +// GFX12: encoding: [0x6a,0x04,0x0a,0x3a] + +v_xor_b32 v5, vcc_hi, v2 +// GFX12: encoding: [0x6b,0x04,0x0a,0x3a] + +v_xor_b32 v5, ttmp15, v2 +// GFX12: encoding: [0x7b,0x04,0x0a,0x3a] + +v_xor_b32 v5, m0, v2 +// GFX12: encoding: [0x7d,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_lo, v2 +// GFX12: encoding: [0x7e,0x04,0x0a,0x3a] + +v_xor_b32 v5, exec_hi, v2 +// GFX12: encoding: [0x7f,0x04,0x0a,0x3a] + +v_xor_b32 v5, null, v2 +// GFX12: encoding: [0x7c,0x04,0x0a,0x3a] + +v_xor_b32 v5, -1, v2 +// GFX12: encoding: [0xc1,0x04,0x0a,0x3a] + +v_xor_b32 v5, 0.5, v2 +// GFX12: encoding: [0xf0,0x04,0x0a,0x3a] + +v_xor_b32 v5, src_scc, v2 +// GFX12: encoding: [0xfd,0x04,0x0a,0x3a] + +v_xor_b32 v255, 0xaf123456, v255 +// GFX12: encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s index 08d4be08813192..5593ea77d9424e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo // W32: encoding: [0x01,0x05,0x0a,0x40] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s new file mode 100644 index 00000000000000..ebab0859b34841 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases-fake16.s @@ -0,0 +1,19 @@ +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s + +v_min_f32 v5, v1, v2 +// GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] + +v_max_f32 v5, v1, v2 +// GFX12: v_max_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] + +v_min_f16 v5, v1, v2 +// GFX12: v_min_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x60] + +v_max_f16 v5, v1, v2 +// GFX12: v_max_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x62] + +v_max_f64 v[5:6], v[1:2], v[2:3] +// GFX12: v_max_num_f64_e32 v[5:6], v[1:2], v[2:3] ; encoding: [0x01,0x05,0x0a,0x1c] + +v_min_f64 v[5:6], v[1:2], v[2:3] +// GFX12: v_min_num_f64_e32 v[5:6], v[1:2], v[2:3] ; encoding: [0x01,0x05,0x0a,0x1a] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s index 3918dd48cfc063..b7e51cf2706478 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_aliases.s @@ -1,4 +1,4 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12 %s v_min_f32 v5, v1, v2 // GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s new file mode 100644 index 00000000000000..53373d1f469732 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16-fake16.s @@ -0,0 +1,2006 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] + +v_add_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] + +v_add_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] + +v_add_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] + +v_add_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] + +v_add_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] + +v_add_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] + +v_add_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x09,0x13] + +v_add_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xf5,0x30] + +v_add_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] + +v_add_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] + +v_add_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] + +v_add_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] + +v_add_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] + +v_add_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] + +v_add_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] + +v_add_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x09,0x13] + +v_add_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xf5,0x30] + +v_add_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] + +v_add_nc_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] + +v_add_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] + +v_add_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x09,0x13] + +v_add_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x05,0x30] + +v_and_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] + +v_and_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] + +v_and_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] + +v_and_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] + +v_and_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] + +v_and_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] + +v_and_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] + +v_and_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x09,0x13] + +v_and_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x05,0x30] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] + +v_ashrrev_i32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] + +v_ashrrev_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] + +v_ashrrev_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x09,0x13] + +v_ashrrev_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x05,0x30] + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pk_rtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x09,0x13] + +v_cvt_pkrtz_f16_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xf5,0x30] + +v_fmac_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] + +v_fmac_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] + +v_fmac_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] + +v_fmac_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] + +v_fmac_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x09,0x13] + +v_fmac_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xf5,0x30] + +v_fmac_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] + +v_fmac_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] + +v_fmac_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] + +v_fmac_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] + +v_fmac_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x09,0x13] + +v_fmac_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xf5,0x30] + +v_ldexp_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] + +v_ldexp_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] + +v_ldexp_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] + +v_ldexp_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x09,0x13] + +v_ldexp_f16 v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x35,0x30] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] + +v_lshlrev_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] + +v_lshlrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] + +v_lshlrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x09,0x13] + +v_lshlrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x05,0x30] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] + +v_lshrrev_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] + +v_lshrrev_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] + +v_lshrrev_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x09,0x13] + +v_lshrrev_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x05,0x30] + +v_max_num_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] + +v_max_num_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] + +v_max_num_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] + +v_max_num_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] + +v_max_num_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x09,0x13] + +v_max_num_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xf5,0x30] + +v_max_num_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff] + +v_max_num_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff] + +v_max_num_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff] + +v_max_num_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01] + +v_max_num_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x60,0x09,0x13] + +v_max_num_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xf5,0x30] + +v_max_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] + +v_max_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] + +v_max_i32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] + +v_max_i32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] + +v_max_i32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] + +v_max_i32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] + +v_max_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] + +v_max_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x09,0x13] + +v_max_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x05,0x30] + +v_max_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] + +v_max_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] + +v_max_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] + +v_max_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] + +v_max_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] + +v_max_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] + +v_max_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] + +v_max_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x09,0x13] + +v_max_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x05,0x30] + +v_min_num_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] + +v_min_num_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] + +v_min_num_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] + +v_min_num_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] + +v_min_num_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x09,0x13] + +v_min_num_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xf5,0x30] + +v_min_num_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff] + +v_min_num_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff] + +v_min_num_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff] + +v_min_num_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01] + +v_min_num_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x60,0x09,0x13] + +v_min_num_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xf5,0x30] + +v_min_i32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] + +v_min_i32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] + +v_min_i32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] + +v_min_i32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] + +v_min_i32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] + +v_min_i32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] + +v_min_i32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] + +v_min_i32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x09,0x13] + +v_min_i32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x05,0x30] + +v_min_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] + +v_min_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] + +v_min_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] + +v_min_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] + +v_min_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] + +v_min_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] + +v_min_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] + +v_min_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x09,0x13] + +v_min_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x05,0x30] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] + +v_mul_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] + +v_mul_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] + +v_mul_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] + +v_mul_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x09,0x13] + +v_mul_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xf5,0x30] + +v_mul_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] + +v_mul_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] + +v_mul_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] + +v_mul_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] + +v_mul_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x09,0x13] + +v_mul_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xf5,0x30] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] + +v_mul_hi_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] + +v_mul_hi_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x09,0x13] + +v_mul_hi_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x05,0x30] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] + +v_mul_hi_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] + +v_mul_hi_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x09,0x13] + +v_mul_hi_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x05,0x30] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] + +v_mul_i32_i24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] + +v_mul_i32_i24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] + +v_mul_i32_i24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x09,0x13] + +v_mul_i32_i24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x05,0x30] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] + +v_mul_dx9_zero_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] + +v_mul_dx9_zero_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x09,0x13] + +v_mul_dx9_zero_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xf5,0x30] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] + +v_mul_u32_u24 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] + +v_mul_u32_u24 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] + +v_mul_u32_u24 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x09,0x13] + +v_mul_u32_u24 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x05,0x30] + +v_or_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] + +v_or_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] + +v_or_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] + +v_or_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] + +v_or_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] + +v_or_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] + +v_or_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] + +v_or_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x09,0x13] + +v_or_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x05,0x30] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] + +v_sub_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] + +v_sub_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] + +v_sub_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] + +v_sub_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x09,0x13] + +v_sub_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xf5,0x30] + +v_sub_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] + +v_sub_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] + +v_sub_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] + +v_sub_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] + +v_sub_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x09,0x13] + +v_sub_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xf5,0x30] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] + +v_sub_nc_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] + +v_sub_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] + +v_sub_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x09,0x13] + +v_sub_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x05,0x30] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_half_mirror +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shl:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_shr:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_ror:15 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W32: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W32: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_half_mirror +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shl:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_shr:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_ror:15 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// W64: encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x09,0x13] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// W64: encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x05,0x30] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] + +v_subrev_f16 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] + +v_subrev_f16 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] + +v_subrev_f16 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] + +v_subrev_f16 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x09,0x13] + +v_subrev_f16 v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xf5,0x30] + +v_subrev_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] + +v_subrev_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] + +v_subrev_f32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] + +v_subrev_f32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] + +v_subrev_f32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x09,0x13] + +v_subrev_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xf5,0x30] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] + +v_subrev_nc_u32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] + +v_subrev_nc_u32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x09,0x13] + +v_subrev_nc_u32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x05,0x30] + +v_xnor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] + +v_xnor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] + +v_xnor_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] + +v_xnor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] + +v_xnor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x09,0x13] + +v_xnor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x05,0x30] + +v_xor_b32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] + +v_xor_b32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] + +v_xor_b32 v5, v1, v2 row_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_half_mirror +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shl:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_shr:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_ror:15 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] + +v_xor_b32 v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] + +v_xor_b32 v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x09,0x13] + +v_xor_b32 v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x05,0x30] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s index 63ffdbe821af8e..a0f93f459f915a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp16.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] // W32: encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s new file mode 100644 index 00000000000000..a7a035f4a9efc0 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8-fake16.s @@ -0,0 +1,433 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s + +v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] + +v_add_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] + +v_add_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] + +v_add_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] + +v_and_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] + +v_ashrrev_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cndmask_b32 v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] + +v_fmac_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] + +v_fmac_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] + +v_ldexp_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] + +v_lshlrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] + +v_lshrrev_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] + +v_max_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] + +v_max_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] + +v_max_num_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] + +v_max_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] + +v_max_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] + +v_max_num_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] + +v_max_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] + +v_max_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] + +v_min_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] + +v_min_num_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] + +v_min_num_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] + +v_min_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] + +v_min_num_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] + +v_min_num_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] + +v_min_i32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] + +v_min_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] + +v_mul_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] + +v_mul_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] + +v_mul_i32_i24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] + +v_mul_u32_u24 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] + +v_or_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] + +v_sub_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] + +v_sub_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] + +v_sub_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// W32: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W32: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W32: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] +// W64: encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: encoding: [0xea,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_co_ci_u32 v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:0 +// W64: encoding: [0xe9,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] + +v_subrev_f16 v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] + +v_subrev_f32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] + +v_xnor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: encoding: [0xea,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] + +v_xor_b32 v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0xe9,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s index 54baafb5366ffb..81fcb323e2711e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_dpp8.s @@ -1,7 +1,7 @@ -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefix=W64-ERR --implicit-check-not=error: %s v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s index 045d698bd504b7..b339bc1960f3e7 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_err.s @@ -1,226 +1,227 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=error %s -v_add_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v255, v1, v2, 0xfe0b -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_add_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v255, v1, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v255, v1, v2, 0xfe0b +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmaak_f16_e32 v5, v1, v255, 0xfe0b +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmaak_f16_e32 v5, v255, v2, 0xfe0b -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode - -v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v5, v255, v2 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmaak_f16_e32 v5, v1, v255, 0xfe0b -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmac_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_fmamk_f16_e32 v255, v1, 0xfe0b, v3 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_fmamk_f16_e32 v5, v1, 0xfe0b, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_fmamk_f16_e32 v5, v255, 0xfe0b, v3 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_e32 v5, v1, v255 -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_ldexp_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction v_max_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_max_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_add_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_fmac_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_ldexp_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_max_num_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_max_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_min_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_add_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_fmac_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_min_num_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:19: error: invalid operand for instruction -v_max_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:27: error: invalid operand for instruction -v_min_num_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_min_num_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:23: error: invalid operand for instruction -v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_mul_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_mul_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_mul_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode v_sub_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_ldexp_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_sub_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_add_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_fmac_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_max_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_f16_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_min_num_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_f16_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_mul_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_dpp v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_sub_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v255, v1, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode -v_subrev_f16_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +v_subrev_f16_e32 v5, v1, v255 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode + +v_subrev_f16_e32 v5, v255, v2 +// GFX12: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s index 13939842f73031..e9e91fa70773d3 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2_t16_promote.s @@ -1,190 +1,191 @@ +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --sort --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12 --implicit-check-not=_e32 %s v_add_f16 v255, v1, v2 -// GFX12: v_add_f16_e64 +// GFX12: v_add_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x32,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v255, v1, v2 -// GFX12: v_fmac_f16_e64 +v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_add_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_ldexp_f16 v255, v1, v2 -// GFX12: v_ldexp_f16_e64 +v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_add_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_num_f16 v255, v1, v2 -// GFX12: v_max_num_f16_e64 +v_add_f16 v5, v1, v255 +// GFX12: v_add_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x32,0xd5,0x01,0xff,0x03,0x00] -v_min_num_f16 v255, v1, v2 -// GFX12: v_min_num_f16_e64 +v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v255, v1, v2 -// GFX12: v_mul_f16_e64 +v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v255, v1, v2 -// GFX12: v_sub_f16_e64 +v_add_f16 v5, v255, v2 +// GFX12: v_add_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x32,0xd5,0xff,0x05,0x02,0x00] -v_subrev_f16 v255, v1, v2 -// GFX12: v_subrev_f16_e64 +v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_add_f16 v5, v255, v2 -// GFX12: v_add_f16_e64 +v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_add_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] + +v_fmac_f16 v255, v1, v2 +// GFX12: v_fmac_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x36,0xd5,0x01,0x05,0x02,0x00] + +v_fmac_f16 v5, v1, v255 +// GFX12: v_fmac_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x36,0xd5,0x01,0xff,0x03,0x00] v_fmac_f16 v5, v255, v2 -// GFX12: v_fmac_f16_e64 +// GFX12: v_fmac_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x36,0xd5,0xff,0x05,0x02,0x00] -v_ldexp_f16 v5, v255, v2 -// GFX12: v_ldexp_f16_e64 +v_ldexp_f16 v255, v1, v2 +// GFX12: v_ldexp_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x3b,0xd5,0x01,0x05,0x02,0x00] -v_max_num_f16 v5, v255, v2 -// GFX12: v_max_num_f16_e64 +v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_min_num_f16 v5, v255, v2 -// GFX12: v_min_num_f16_e64 +v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v5, v255, v2 -// GFX12: v_mul_f16_e64 +v_ldexp_f16 v5, v255, v2 +// GFX12: v_ldexp_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x3b,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v5, v255, v2 -// GFX12: v_sub_f16_e64 +v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v5, v255, v2 -// GFX12: v_subrev_f16_e64 +v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_add_f16 v5, v1, v255 -// GFX12: v_add_f16_e64 +v_max_num_f16 v255, v1, v2 +// GFX12: v_max_num_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x31,0xd5,0x01,0x05,0x02,0x00] -v_fmac_f16 v5, v1, v255 -// GFX12: v_fmac_f16_e64 +v_max_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] + +v_max_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_max_num_f16 v5, v1, v255 -// GFX12: v_max_num_f16_e64 +// GFX12: v_max_num_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x31,0xd5,0x01,0xff,0x03,0x00] -v_min_num_f16 v5, v1, v255 -// GFX12: v_min_num_f16_e64 +v_max_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v5, v1, v255 -// GFX12: v_mul_f16_e64 +v_max_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v5, v1, v255 -// GFX12: v_sub_f16_e64 +v_max_num_f16 v5, v255, v2 +// GFX12: v_max_num_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x31,0xd5,0xff,0x05,0x02,0x00] -v_subrev_f16 v5, v1, v255 -// GFX12: v_subrev_f16_e64 +v_max_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_add_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_add_f16_e64 +v_max_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_max_num_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_ldexp_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_min_num_f16 v255, v1, v2 +// GFX12: v_min_num_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x30,0xd5,0x01,0x05,0x02,0x00] -v_max_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_min_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] v_min_num_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64 +// GFX12: v_min_num_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_mul_f16_e64 - -v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_sub_f16_e64 +v_min_num_f16 v5, v1, v255 +// GFX12: v_min_num_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x30,0xd5,0x01,0xff,0x03,0x00] -v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_min_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_add_f16_e64 +v_min_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_min_num_f16 v5, v255, v2 +// GFX12: v_min_num_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x30,0xd5,0xff,0x05,0x02,0x00] -v_max_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_min_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_min_num_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] v_min_num_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64 +// GFX12: v_min_num_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_mul_f16_e64 - -v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_sub_f16_e64 +v_mul_f16 v255, v1, v2 +// GFX12: v_mul_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x35,0xd5,0x01,0x05,0x02,0x00] -v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_add_f16_e64 +v_mul_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_mul_f16 v5, v1, v255 +// GFX12: v_mul_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x35,0xd5,0x01,0xff,0x03,0x00] -v_min_num_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] v_mul_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_mul_f16_e64 - -v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_sub_f16_e64 +// GFX12: v_mul_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_mul_f16 v5, v255, v2 +// GFX12: v_mul_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x35,0xd5,0xff,0x05,0x02,0x00] -v_add_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_add_f16_e64 +v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_ldexp_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_mul_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_mul_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_max_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_sub_f16 v255, v1, v2 +// GFX12: v_sub_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x33,0xd5,0x01,0x05,0x02,0x00] -v_min_num_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_mul_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mul_f16_e64 +v_sub_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_sub_f16_e64 +v_sub_f16 v5, v1, v255 +// GFX12: v_sub_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x33,0xd5,0x01,0xff,0x03,0x00] -v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_add_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_add_f16_e64 +v_sub_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_ldexp_f16_e64 +v_sub_f16 v5, v255, v2 +// GFX12: v_sub_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x33,0xd5,0xff,0x05,0x02,0x00] -v_max_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_min_num_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_sub_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_sub_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] -v_mul_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mul_f16_e64 +v_subrev_f16 v255, v1, v2 +// GFX12: v_subrev_f16_e64 v255, v1, v2 ; encoding: [0xff,0x00,0x34,0xd5,0x01,0x05,0x02,0x00] -v_sub_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_sub_f16_e64 +v_subrev_f16 v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v255, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xff,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_subrev_f16 v255, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v255, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_add_f16_e64 +v_subrev_f16 v5, v1, v255 +// GFX12: v_subrev_f16_e64 v5, v1, v255 ; encoding: [0x05,0x00,0x34,0xd5,0x01,0xff,0x03,0x00] -v_max_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_max_num_f16_e64 +v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0xfe,0x03,0x00,0x01,0x77,0x39,0x05] -v_min_num_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_min_num_f16_e64 +v_subrev_f16 v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] -v_mul_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_mul_f16_e64 +v_subrev_f16 v5, v255, v2 +// GFX12: v_subrev_f16_e64 v5, v255, v2 ; encoding: [0x05,0x00,0x34,0xd5,0xff,0x05,0x02,0x00] -v_sub_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_sub_f16_e64 +v_subrev_f16 v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v255, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0xff,0x77,0x39,0x05] -v_subrev_f16 v5, v1, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_subrev_f16_e64 +v_subrev_f16 v5, v255, v2 quad_perm:[3,2,1,0] +// GFX12: v_subrev_f16_e64_dpp v5, v255, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0xff,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt index 26ffd3a4e383b2..4f638cd8ff54f3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt @@ -1,2334 +1,2324 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-FAKE16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W32,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding %s | FileCheck -strict-whitespace -check-prefixes=GFX11,W64,GFX11-FAKE16 %s +0x01,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x40] -0x01,0x05,0x0a,0x40 +0xff,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x40] -0xff,0x05,0x0a,0x40 +0x01,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x40] -0x01,0x04,0x0a,0x40 +0x69,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x40] -0x69,0x04,0x0a,0x40 +0x6a,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x40] -0x6a,0x04,0x0a,0x40 +0x6b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x40] -0x6b,0x04,0x0a,0x40 +0x7b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x40] -0x7b,0x04,0x0a,0x40 +0x7d,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x40] -0x7d,0x04,0x0a,0x40 +0x7e,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x40] -0x7e,0x04,0x0a,0x40 +0x7f,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x40] -0x7f,0x04,0x0a,0x40 +0x7c,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x40] -0x7c,0x04,0x0a,0x40 +0xc1,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x40] -0xc1,0x04,0x0a,0x40 +0xf0,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x40] -0xf0,0x04,0x0a,0x40 +0xfd,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x40] -0xfd,0x04,0x0a,0x40 +0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf # W32: v_add_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] # W64: v_add_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf +0x01,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] -0x01,0x05,0x0a,0x64 +0x81,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x64] -0x81,0x05,0x0a,0x64 +0x7f,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x64] -0x7f,0x05,0x0a,0x64 +0xff,0x05,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x64] -0xff,0x05,0x0a,0x64 +0x01,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x64] -0x01,0x04,0x0a,0x64 +0x69,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x64] -0x69,0x04,0x0a,0x64 +0x6a,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x64] -0x6a,0x04,0x0a,0x64 +0x6b,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x64] -0x6b,0x04,0x0a,0x64 +0x7b,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x64] -0x7b,0x04,0x0a,0x64 +0x7d,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x64] -0x7d,0x04,0x0a,0x64 +0x7e,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x64] -0x7e,0x04,0x0a,0x64 +0x7f,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x64] -0x7f,0x04,0x0a,0x64 +0x7c,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x64] -0x7c,0x04,0x0a,0x64 +0xc1,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x64] -0xc1,0x04,0x0a,0x64 +0xf0,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x64] -0xf0,0x04,0x0a,0x64 +0xfd,0x04,0x0a,0x64 # GFX11-REAL16: v_add_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x64] # GFX11-FAKE16: v_add_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x64] -0xfd,0x04,0x0a,0x64 -# GFX11-REAL16: v_add_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x65] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x65 0xfd,0x04,0x0b,0x65 +# GFX11-REAL16: v_add_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x65] +0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_add_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_add_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_add_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_add_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x65,0x0b,0xfe,0x00,0x00] -# GFX11: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] 0x01,0x05,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] 0xff,0x05,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] 0x01,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] 0x69,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] 0x6a,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] 0x6b,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] 0x7b,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] 0x7d,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] 0x7e,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] 0x7f,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] 0x7c,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] 0xc1,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] 0xf0,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] 0xfd,0x04,0x0a,0x06 +# GFX11: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] -# GFX11: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf +# GFX11: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] -# GFX11: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] 0x01,0x05,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] 0xff,0x05,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] 0x01,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] 0x69,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] 0x6a,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] 0x6b,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] 0x7b,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] 0x7d,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] 0x7e,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] 0x7f,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] 0x7c,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] 0xc1,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] 0xf0,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] 0xfd,0x04,0x0a,0x4a +# GFX11: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] -# GFX11: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf +# GFX11: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] -# GFX11: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] 0x01,0x05,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] 0xff,0x05,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] 0x01,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] 0x69,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] 0x6a,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] 0x6b,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] 0x7b,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] 0x7d,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] 0x7e,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] 0x7f,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] 0x7c,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] 0xc1,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] 0xf0,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] 0xfd,0x04,0x0a,0x36 +# GFX11: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] -# GFX11: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf +# GFX11: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] -# GFX11: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] 0x01,0x05,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] 0xff,0x05,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] 0x01,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] 0x69,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] 0x6a,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] 0x6b,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] 0x7b,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] 0x7d,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] 0x7e,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] 0x7f,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] 0x7c,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] 0xc1,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] 0xf0,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] 0xfd,0x04,0x0a,0x34 +# GFX11: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] -# GFX11: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf +# GFX11: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x02] -0x01,0x05,0x0a,0x02 +0xff,0x05,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x02] -0xff,0x05,0x0a,0x02 +0x01,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x02] -0x01,0x04,0x0a,0x02 +0x69,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x02] -0x69,0x04,0x0a,0x02 +0x6a,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x02] -0x6a,0x04,0x0a,0x02 +0x6b,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x02] -0x6b,0x04,0x0a,0x02 +0x7b,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x02] -0x7b,0x04,0x0a,0x02 +0x7d,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x02] -0x7d,0x04,0x0a,0x02 +0x7e,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x02] -0x7e,0x04,0x0a,0x02 +0x7f,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x02] -0x7f,0x04,0x0a,0x02 +0x7c,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x02] -0x7c,0x04,0x0a,0x02 +0xc1,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x02] -0xc1,0x04,0x0a,0x02 +0xf0,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x02] -0xf0,0x04,0x0a,0x02 +0xfd,0x04,0x0a,0x02 # W32: v_cndmask_b32_e32 v5, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x02] # W64: v_cndmask_b32_e32 v5, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x02] -0xfd,0x04,0x0a,0x02 +0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf # W32: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] # W64: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] 0x01,0x05,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] 0xff,0x05,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] 0x01,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] 0x69,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] 0x6a,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] 0x6b,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] 0x7b,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] 0x7d,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] 0x7e,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] 0x7f,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] 0x7c,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] 0xc1,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] 0xf0,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] 0xfd,0x04,0x0a,0x5e +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] -# GFX11: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf +# GFX11: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] -# GFX11: v_dot2acc_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04] 0x01,0x05,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x04] 0xff,0x05,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x04] 0x01,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x04] 0x69,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x04] 0x6a,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x04] 0x6b,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x04] 0x7b,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x04] 0x7d,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x04] 0x7e,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x04] 0x7f,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x04] 0x7c,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x04] 0xc1,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x04] 0xf0,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x04] 0xfd,0x04,0x0a,0x04 +# GFX11: v_dot2acc_f32_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x04] -# GFX11: v_dot2acc_f32_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00 +# GFX11: v_dot2acc_f32_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x05,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX11: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf +# GFX11: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0c] 0x01,0x05,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0c] 0xff,0x05,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0c] 0x01,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0c] 0x69,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0c] 0x6a,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0c] 0x6b,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0c] 0x7b,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0c] 0x7d,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0c] 0x7e,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0c] 0x7f,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0c] 0x7c,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0c] 0xc1,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0c] 0xf0,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0c] 0xfd,0x04,0x0a,0x0c +# GFX11: v_fmac_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0c] -# GFX11: v_fmac_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf +# GFX11: v_fmac_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0d,0x56,0x34,0x12,0xaf] -# GFX11: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] 0x01,0x05,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] 0x7f,0x05,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] 0x01,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] 0x69,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] 0x6a,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] 0x6b,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] 0x7b,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] 0x7d,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] 0x7e,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] 0x7f,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] 0x7c,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] 0xc1,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] 0xf0,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] 0xfd,0x04,0x0a,0x6c +# GFX11: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] -# GFX11: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] 0x01,0x05,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] 0xff,0x05,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] 0x01,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] 0x69,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] 0x6a,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] 0x6b,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] 0x7b,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] 0x7d,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] 0x7e,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] 0x7f,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] 0x7c,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] 0xc1,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] 0xf0,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] 0xfd,0x04,0x0a,0x56 +# GFX11: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] -# GFX11: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf +# GFX11: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00 +# GFX11: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] -# GFX11: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX11: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf +# GFX11: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x76] 0x01,0x05,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x76] 0x7f,0x05,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x76] 0x01,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x76] 0x69,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x76] 0x6a,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x76] 0x6b,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x76] 0x7b,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x76] 0x7d,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x76] 0x7e,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x76] 0x7f,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x76] 0x7c,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x76] 0xc1,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x76] 0xf0,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] -# GFX11-REAL16: v_ldexp_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x76] 0xfd,0x04,0x0a,0x76 +# GFX11-REAL16: v_ldexp_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x76] +# GFX11-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] -# GFX11-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] -# GFX11-REAL16: v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] +# GFX11-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] -# GFX11: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] 0x01,0x05,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] 0xff,0x05,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] 0x01,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] 0x69,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] 0x6a,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] 0x6b,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] 0x7b,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] 0x7d,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] 0x7e,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] 0x7f,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] 0x7c,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] 0xc1,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] 0xf0,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] 0xfd,0x04,0x0a,0x30 +# GFX11: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] -# GFX11: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf +# GFX11: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] -# GFX11: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] 0x01,0x05,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] 0xff,0x05,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] 0x01,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] 0x69,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] 0x6a,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] 0x6b,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] 0x7b,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] 0x7d,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] 0x7e,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] 0x7f,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] 0x7c,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] 0xc1,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] 0xf0,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] 0xfd,0x04,0x0a,0x32 +# GFX11: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] -# GFX11: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf +# GFX11: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x72] -0x01,0x05,0x0a,0x72 +0x81,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x72] -0x81,0x05,0x0a,0x72 +0x7f,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x72] -0x7f,0x05,0x0a,0x72 +0xff,0x05,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x72] -0xff,0x05,0x0a,0x72 +0x01,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x72] -0x01,0x04,0x0a,0x72 +0x69,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x72] -0x69,0x04,0x0a,0x72 +0x6a,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x72] -0x6a,0x04,0x0a,0x72 +0x6b,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x72] -0x6b,0x04,0x0a,0x72 +0x7b,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x72] -0x7b,0x04,0x0a,0x72 +0x7d,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x72] -0x7d,0x04,0x0a,0x72 +0x7e,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x72] -0x7e,0x04,0x0a,0x72 +0x7f,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x72] -0x7f,0x04,0x0a,0x72 +0x7c,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x72] -0x7c,0x04,0x0a,0x72 +0xc1,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x72] -0xc1,0x04,0x0a,0x72 +0xf0,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x72] -0xf0,0x04,0x0a,0x72 +0xfd,0x04,0x0a,0x72 # GFX11-REAL16: v_max_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x72] # GFX11-FAKE16: v_max_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x72] -0xfd,0x04,0x0a,0x72 -# GFX11-REAL16: v_max_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x73] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x73 0xfd,0x04,0x0b,0x73 +# GFX11-REAL16: v_max_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x73] +0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_max_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_max_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x72,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_max_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_max_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x73,0x0b,0xfe,0x00,0x00] -# GFX11: v_max_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x20] 0x01,0x05,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x20] 0xff,0x05,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x20] 0x01,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x20] 0x69,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x20] 0x6a,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x20] 0x6b,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x20] 0x7b,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x20] 0x7d,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x20] 0x7e,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x20] 0x7f,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x20] 0x7c,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x20] 0xc1,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x20] 0xf0,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x20] 0xfd,0x04,0x0a,0x20 +# GFX11: v_max_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x20] -# GFX11: v_max_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf +# GFX11: v_max_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x21,0x56,0x34,0x12,0xaf] -# GFX11: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] 0x01,0x05,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] 0xff,0x05,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] 0x01,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] 0x69,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] 0x6a,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] 0x6b,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] 0x7b,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] 0x7d,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] 0x7e,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] 0x7f,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] 0x7c,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] 0xc1,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] 0xf0,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] 0xfd,0x04,0x0a,0x24 +# GFX11: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] -# GFX11: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf +# GFX11: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] -# GFX11: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] 0x01,0x05,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] 0xff,0x05,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] 0x01,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] 0x69,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] 0x6a,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] 0x6b,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] 0x7b,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] 0x7d,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] 0x7e,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] 0x7f,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] 0x7c,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] 0xc1,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] 0xf0,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] 0xfd,0x04,0x0a,0x28 +# GFX11: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] -# GFX11: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf +# GFX11: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x74] -0x01,0x05,0x0a,0x74 +0x81,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x74] -0x81,0x05,0x0a,0x74 +0x7f,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x74] -0x7f,0x05,0x0a,0x74 +0xff,0x05,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x74] -0xff,0x05,0x0a,0x74 +0x01,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x74] -0x01,0x04,0x0a,0x74 +0x69,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x74] -0x69,0x04,0x0a,0x74 +0x6a,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x74] -0x6a,0x04,0x0a,0x74 +0x6b,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x74] -0x6b,0x04,0x0a,0x74 +0x7b,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x74] -0x7b,0x04,0x0a,0x74 +0x7d,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x74] -0x7d,0x04,0x0a,0x74 +0x7e,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x74] -0x7e,0x04,0x0a,0x74 +0x7f,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x74] -0x7f,0x04,0x0a,0x74 +0x7c,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x74] -0x7c,0x04,0x0a,0x74 +0xc1,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x74] -0xc1,0x04,0x0a,0x74 +0xf0,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x74] -0xf0,0x04,0x0a,0x74 +0xfd,0x04,0x0a,0x74 # GFX11-REAL16: v_min_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x74] # GFX11-FAKE16: v_min_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x74] -0xfd,0x04,0x0a,0x74 -# GFX11-REAL16: v_min_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x75] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x75 0xfd,0x04,0x0b,0x75 +# GFX11-REAL16: v_min_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x75] +0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_min_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_min_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x74,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_min_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_min_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x75,0x0b,0xfe,0x00,0x00] -# GFX11: v_min_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1e] 0x01,0x05,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x1e] 0xff,0x05,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x1e] 0x01,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x1e] 0x69,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x1e] 0x6a,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x1e] 0x6b,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x1e] 0x7b,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x1e] 0x7d,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x1e] 0x7e,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x1e] 0x7f,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x1e] 0x7c,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x1e] 0xc1,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x1e] 0xf0,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x1e] 0xfd,0x04,0x0a,0x1e +# GFX11: v_min_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x1e] -# GFX11: v_min_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf +# GFX11: v_min_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x1f,0x56,0x34,0x12,0xaf] -# GFX11: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] 0x01,0x05,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] 0xff,0x05,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] 0x01,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] 0x69,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] 0x6a,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] 0x6b,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] 0x7b,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] 0x7d,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] 0x7e,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] 0x7f,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] 0x7c,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] 0xc1,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] 0xf0,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] 0xfd,0x04,0x0a,0x22 +# GFX11: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] -# GFX11: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf +# GFX11: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] -# GFX11: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] 0x01,0x05,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] 0xff,0x05,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] 0x01,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] 0x69,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] 0x6a,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] 0x6b,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] 0x7b,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] 0x7d,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] 0x7e,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] 0x7f,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] 0x7c,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] 0xc1,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] 0xf0,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] 0xfd,0x04,0x0a,0x26 +# GFX11: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] -# GFX11: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf +# GFX11: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] 0x01,0x05,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] 0xff,0x05,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] 0x01,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] 0x69,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] 0x6a,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] 0x6b,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] 0x7b,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] 0x7d,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] 0x7e,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] 0x7f,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] 0x7c,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] 0xc1,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] 0xf0,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] 0xfd,0x04,0x0a,0x0e +# GFX11: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] -# GFX11: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf +# GFX11: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6a] -0x01,0x05,0x0a,0x6a -# GFX11-REAL16: v_mul_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x6a] -# GFX11-FAKE16: v_mul_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x6a 0x81,0x05,0x0a,0x6a +# GFX11-REAL16: v_mul_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x6a] +# GFX11-FAKE16: v_mul_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x6a] +0x7f,0x05,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6a] -0x7f,0x05,0x0a,0x6a +0xff,0x05,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x6a] -0xff,0x05,0x0a,0x6a +0x01,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6a] -0x01,0x04,0x0a,0x6a +0x69,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6a] -0x69,0x04,0x0a,0x6a +0x6a,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6a] -0x6a,0x04,0x0a,0x6a +0x6b,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6a] -0x6b,0x04,0x0a,0x6a +0x7b,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6a] -0x7b,0x04,0x0a,0x6a +0x7d,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6a] -0x7d,0x04,0x0a,0x6a +0x7e,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6a] -0x7e,0x04,0x0a,0x6a +0x7f,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6a] -0x7f,0x04,0x0a,0x6a +0x7c,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6a] -0x7c,0x04,0x0a,0x6a +0xc1,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6a] -0xc1,0x04,0x0a,0x6a +0xf0,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6a] -0xf0,0x04,0x0a,0x6a +0xfd,0x04,0x0a,0x6a # GFX11-REAL16: v_mul_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x6a] # GFX11-FAKE16: v_mul_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6a] -0xfd,0x04,0x0a,0x6a -# GFX11-REAL16: v_mul_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x6b] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x6b 0xfd,0x04,0x0b,0x6b +# GFX11-REAL16: v_mul_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x6b] +0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_mul_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_mul_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_mul_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_mul_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x6b,0x0b,0xfe,0x00,0x00] -# GFX11: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] 0x01,0x05,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] 0xff,0x05,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] 0x01,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] 0x69,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] 0x6a,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] 0x6b,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] 0x7b,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] 0x7d,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] 0x7e,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] 0x7f,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] 0x7c,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] 0xc1,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] 0xf0,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] 0xfd,0x04,0x0a,0x10 +# GFX11: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] -# GFX11: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf +# GFX11: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] 0x01,0x05,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] 0xff,0x05,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] 0x01,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] 0x69,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] 0x6a,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] 0x6b,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] 0x7b,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] 0x7d,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] 0x7e,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] 0x7f,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] 0x7c,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] 0xc1,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] 0xf0,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] 0xfd,0x04,0x0a,0x14 +# GFX11: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] -# GFX11: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] 0x01,0x05,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] 0xff,0x05,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] 0x01,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] 0x69,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] 0x6a,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] 0x6b,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] 0x7b,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] 0x7d,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] 0x7e,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] 0x7f,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] 0x7c,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] 0xc1,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] 0xf0,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] 0xfd,0x04,0x0a,0x18 +# GFX11: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] -# GFX11: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf +# GFX11: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] 0x01,0x05,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] 0xff,0x05,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] 0x01,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] 0x69,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] 0x6a,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] 0x6b,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] 0x7b,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] 0x7d,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] 0x7e,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] 0x7f,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] 0x7c,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] 0xc1,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] 0xf0,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] 0xfd,0x04,0x0a,0x12 +# GFX11: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] -# GFX11: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf +# GFX11: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] -# GFX11: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] 0x01,0x05,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] 0xff,0x05,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] 0x01,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] 0x69,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] 0x6a,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] 0x6b,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] 0x7b,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] 0x7d,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] 0x7e,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] 0x7f,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] 0x7c,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] 0xc1,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] 0xf0,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] 0xfd,0x04,0x0a,0x16 +# GFX11: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] -# GFX11: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf +# GFX11: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] -# GFX11: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] 0x01,0x05,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] 0xff,0x05,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] 0x01,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] 0x69,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] 0x6a,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] 0x6b,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] 0x7b,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] 0x7d,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] 0x7e,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] 0x7f,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] 0x7c,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] 0xc1,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] 0xf0,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] 0xfd,0x04,0x0a,0x38 +# GFX11: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] -# GFX11: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf +# GFX11: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] -# GFX11: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] 0x01,0x05,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] 0xff,0x05,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] 0x01,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] 0x69,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] 0x6a,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] 0x6b,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] 0x7b,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] 0x7d,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] 0x7e,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] 0x7f,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] 0x7c,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] 0xc1,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] 0xf0,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] 0xfd,0x04,0x0a,0x78 +# GFX11: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] -# GFX11: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00 +# GFX11: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] +0x01,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x42] -0x01,0x05,0x0a,0x42 +0xff,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x42] -0xff,0x05,0x0a,0x42 +0x01,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x42] -0x01,0x04,0x0a,0x42 +0x69,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x42] -0x69,0x04,0x0a,0x42 +0x6a,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x42] -0x6a,0x04,0x0a,0x42 +0x6b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x42] -0x6b,0x04,0x0a,0x42 +0x7b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x42] -0x7b,0x04,0x0a,0x42 +0x7d,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x42] -0x7d,0x04,0x0a,0x42 +0x7e,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x42] -0x7e,0x04,0x0a,0x42 +0x7f,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x42] -0x7f,0x04,0x0a,0x42 +0x7c,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x42] -0x7c,0x04,0x0a,0x42 +0xc1,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x42] -0xc1,0x04,0x0a,0x42 +0xf0,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x42] -0xf0,0x04,0x0a,0x42 +0xfd,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x42] -0xfd,0x04,0x0a,0x42 +0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf # W32: v_sub_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] # W64: v_sub_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf +0x01,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x66] -0x01,0x05,0x0a,0x66 +0x81,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x66] -0x81,0x05,0x0a,0x66 +0x7f,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x66] -0x7f,0x05,0x0a,0x66 +0xff,0x05,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x66] -0xff,0x05,0x0a,0x66 +0x01,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x66] -0x01,0x04,0x0a,0x66 +0x69,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x66] -0x69,0x04,0x0a,0x66 +0x6a,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x66] -0x6a,0x04,0x0a,0x66 +0x6b,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x66] -0x6b,0x04,0x0a,0x66 +0x7b,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x66] -0x7b,0x04,0x0a,0x66 +0x7d,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x66] -0x7d,0x04,0x0a,0x66 +0x7e,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x66] -0x7e,0x04,0x0a,0x66 +0x7f,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x66] -0x7f,0x04,0x0a,0x66 +0x7c,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x66] -0x7c,0x04,0x0a,0x66 +0xc1,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x66] -0xc1,0x04,0x0a,0x66 +0xf0,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x66] -0xf0,0x04,0x0a,0x66 +0xfd,0x04,0x0a,0x66 # GFX11-REAL16: v_sub_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x66] # GFX11-FAKE16: v_sub_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x66] -0xfd,0x04,0x0a,0x66 -# GFX11-REAL16: v_sub_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x67] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x67 0xfd,0x04,0x0b,0x67 +# GFX11-REAL16: v_sub_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x67] +0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_sub_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_sub_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_sub_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_sub_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x67,0x0b,0xfe,0x00,0x00] -# GFX11: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] 0x01,0x05,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] 0xff,0x05,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] 0x01,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] 0x69,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] 0x6a,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] 0x6b,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] 0x7b,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] 0x7d,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] 0x7e,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] 0x7f,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] 0x7c,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] 0xc1,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] 0xf0,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] 0xfd,0x04,0x0a,0x08 +# GFX11: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] -# GFX11: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf +# GFX11: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] -# GFX11: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] 0x01,0x05,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] 0xff,0x05,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] 0x01,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] 0x69,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] 0x6a,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] 0x6b,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] 0x7b,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] 0x7d,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] 0x7e,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] 0x7f,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] 0x7c,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] 0xc1,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] 0xf0,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] 0xfd,0x04,0x0a,0x4c +# GFX11: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] -# GFX11: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf +# GFX11: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x44] -0x01,0x05,0x0a,0x44 +0xff,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x44] -0xff,0x05,0x0a,0x44 +0x01,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x44] -0x01,0x04,0x0a,0x44 +0x69,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x44] -0x69,0x04,0x0a,0x44 +0x6a,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x44] -0x6a,0x04,0x0a,0x44 +0x6b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x44] -0x6b,0x04,0x0a,0x44 +0x7b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x44] -0x7b,0x04,0x0a,0x44 +0x7d,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x44] -0x7d,0x04,0x0a,0x44 +0x7e,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x44] -0x7e,0x04,0x0a,0x44 +0x7f,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x44] -0x7f,0x04,0x0a,0x44 +0x7c,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x44] -0x7c,0x04,0x0a,0x44 +0xc1,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x44] -0xc1,0x04,0x0a,0x44 +0xf0,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x44] -0xf0,0x04,0x0a,0x44 +0xfd,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x44] -0xfd,0x04,0x0a,0x44 +0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf # W32: v_subrev_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] # W64: v_subrev_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf +0x01,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x68] -0x01,0x05,0x0a,0x68 +0x81,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v1.h, v2.l ; encoding: [0x81,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v129/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0x81,0x05,0x0a,0x68] -0x81,0x05,0x0a,0x68 +0x7f,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x68] -0x7f,0x05,0x0a,0x68 +0xff,0x05,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, v127.h, v2.l ; encoding: [0xff,0x05,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, v255/*Invalid register, operand has 'VS_32_Lo128' register class*/, v2 ; encoding: [0xff,0x05,0x0a,0x68] -0xff,0x05,0x0a,0x68 +0x01,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x68] -0x01,0x04,0x0a,0x68 +0x69,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x68] -0x69,0x04,0x0a,0x68 +0x6a,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x68] -0x6a,0x04,0x0a,0x68 +0x6b,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x68] -0x6b,0x04,0x0a,0x68 +0x7b,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x68] -0x7b,0x04,0x0a,0x68 +0x7d,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x68] -0x7d,0x04,0x0a,0x68 +0x7e,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x68] -0x7e,0x04,0x0a,0x68 +0x7f,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x68] -0x7f,0x04,0x0a,0x68 +0x7c,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x68] -0x7c,0x04,0x0a,0x68 +0xc1,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x68] -0xc1,0x04,0x0a,0x68 +0xf0,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x68] -0xf0,0x04,0x0a,0x68 +0xfd,0x04,0x0a,0x68 # GFX11-REAL16: v_subrev_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x68] # GFX11-FAKE16: v_subrev_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x68] -0xfd,0x04,0x0a,0x68 -# GFX11-REAL16: v_subrev_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x69] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xfd,0x04,0x0b,0x69 0xfd,0x04,0x0b,0x69 +# GFX11-REAL16: v_subrev_f16_e32 v5.h, src_scc, v2.h ; encoding: [0xfd,0x04,0x0b,0x69] +0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00 # GFX11-REAL16: v_subrev_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] # GFX11-FAKE16: v_subrev_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] -0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00 -# GFX11-REAL16: v_subrev_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00] -# COM: TODO: GFX11-FAKE16: warning: invalid instruction encoding 0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00 0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00 +# GFX11-REAL16: v_subrev_f16_e32 v127.h, 0xfe0b, v127.h ; encoding: [0xff,0xfe,0xff,0x69,0x0b,0xfe,0x00,0x00] -# GFX11: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] 0x01,0x05,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] 0xff,0x05,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] 0x01,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] 0x69,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] 0x6a,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] 0x6b,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] 0x7b,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] 0x7d,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] 0x7e,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] 0x7f,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] 0x7c,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] 0xc1,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] 0xf0,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] 0xfd,0x04,0x0a,0x0a +# GFX11: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] -# GFX11: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf +# GFX11: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] -# GFX11: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] 0x01,0x05,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] 0xff,0x05,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] 0x01,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] 0x69,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] 0x6a,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] 0x6b,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] 0x7b,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] 0x7d,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] 0x7e,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] 0x7f,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] 0x7c,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] 0xc1,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] 0xf0,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] 0xfd,0x04,0x0a,0x4e +# GFX11: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] -# GFX11: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf +# GFX11: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] -# GFX11: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] 0x01,0x05,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] 0xff,0x05,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] 0x01,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] 0x69,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] 0x6a,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] 0x6b,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] 0x7b,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] 0x7d,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] 0x7e,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] 0x7f,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] 0x7c,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] 0xc1,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] 0xf0,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] 0xfd,0x04,0x0a,0x3c +# GFX11: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] -# GFX11: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf +# GFX11: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] -# GFX11: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] 0x01,0x05,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] 0xff,0x05,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] 0x01,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] 0x69,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] 0x6a,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] 0x6b,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] 0x7b,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] 0x7d,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] 0x7e,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] 0x7f,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] 0x7c,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] 0xc1,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] 0xf0,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] 0xfd,0x04,0x0a,0x3a +# GFX11: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] -# GFX11: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf +# GFX11: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt index eebf0cc13cee63..a8a40f883cc485 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp16.txt @@ -1,1750 +1,1851 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-FAKE16 %s +0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 -# GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] -# GFX11: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] -# GFX11: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] -# GFX11: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_add_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] -# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff +# GFX11: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] -# GFX11: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01 +# GFX11: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] -# GFX11: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13 +# GFX11: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] -# GFX11: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30 +# GFX11: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01 +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13 +# GFX11: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] -# GFX11: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30 +# GFX11: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] -# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff +# GFX11: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] -# GFX11: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01 +# GFX11: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] -# GFX11: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13 +# GFX11: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] -# GFX11: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30 +# GFX11: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01 +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13 +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] -# GFX11: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30 +# GFX11: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff # W32: v_cndmask_b32_dpp v5, -v1, |v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff] # W64: v_cndmask_b32_dpp v5, -v1, |v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x90,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff # W32: v_cndmask_b32_dpp v5, |v1|, -v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff] # W64: v_cndmask_b32_dpp v5, |v1|, -v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x60,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff # W32: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] # W64: v_cndmask_b32_dpp v5, -|v1|, -|v2|, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0xf0,0xff -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1b,0x00,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0xe4,0x00,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x40,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x41,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x01,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x0f,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x11,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x1f,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x21,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x2f,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x50,0x01,0xff] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01 +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x5f,0x01,0x01] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x04,0x01,0x60,0x01,0x13 +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x04,0x01,0x60,0x01,0x13] -# GFX11: v_dot2acc_f32_f16_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x05,0xff,0x6f,0xfd,0x30 +# GFX11: v_dot2acc_f32_f16_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x05,0xff,0x6f,0xfd,0x30] -# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01 +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] -# GFX11: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13 +# GFX11: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] -# GFX11: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30 +# GFX11: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] -# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01 +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] -# GFX11: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13 +# GFX11: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] -# GFX11: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30 +# GFX11: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] -# GFX11: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] 0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30 +# GFX11-REAL16: v_ldexp_f16_dpp v127.l, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] +# GFX11-FAKE16: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01 +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13 +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] -# GFX11: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01 +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13 +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] -# GFX11: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30 +# GFX11: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] -# GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1b,0x00,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0xe4,0x00,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x40,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x41,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x01,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x0f,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x11,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x1f,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x21,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x2f,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x50,0x01,0xff] -# GFX11: v_max_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x5f,0x01,0x01] -# GFX11: v_max_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x72,0x01,0x60,0x01,0x13] -# GFX11: v_max_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_max_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_max_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x72,0x7f,0x6f,0xfd,0x30] -# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1b,0x00,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0xe4,0x00,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x40,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x41,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x01,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x0f,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x11,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x1f,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x21,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x2f,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff +# GFX11: v_max_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x50,0x01,0xff] -# GFX11: v_max_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01 +# GFX11: v_max_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x5f,0x01,0x01] -# GFX11: v_max_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x20,0x01,0x60,0x01,0x13 +# GFX11: v_max_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x20,0x01,0x60,0x01,0x13] -# GFX11: v_max_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x21,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x21,0xff,0x6f,0xfd,0x30 +# GFX11: v_max_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x21,0xff,0x6f,0xfd,0x30] -# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff +# GFX11: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] -# GFX11: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01 +# GFX11: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] -# GFX11: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13 +# GFX11: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] -# GFX11: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30 +# GFX11: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] -# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff +# GFX11: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] -# GFX11: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01 +# GFX11: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] -# GFX11: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13 +# GFX11: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] -# GFX11: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30 +# GFX11: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] -# GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1b,0x00,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0xe4,0x00,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x40,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x41,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x01,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x0f,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x11,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x1f,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x21,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x2f,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x50,0x01,0xff] -# GFX11: v_min_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x5f,0x01,0x01] -# GFX11: v_min_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x74,0x01,0x60,0x01,0x13] -# GFX11: v_min_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_min_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_min_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x74,0x7f,0x6f,0xfd,0x30] -# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1b,0x00,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0xe4,0x00,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x40,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x41,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x01,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x0f,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x11,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x1f,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x21,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x2f,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff +# GFX11: v_min_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x50,0x01,0xff] -# GFX11: v_min_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01 +# GFX11: v_min_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x5f,0x01,0x01] -# GFX11: v_min_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x1e,0x01,0x60,0x01,0x13 +# GFX11: v_min_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x1e,0x01,0x60,0x01,0x13] -# GFX11: v_min_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xfd,0x30 +# GFX11: v_min_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x1f,0xff,0x6f,0xfd,0x30] -# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff +# GFX11: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] -# GFX11: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01 +# GFX11: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] -# GFX11: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13 +# GFX11: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] -# GFX11: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30 +# GFX11: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] -# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff +# GFX11: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] -# GFX11: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01 +# GFX11: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] -# GFX11: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13 +# GFX11: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] -# GFX11: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30 +# GFX11: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13 +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] -# GFX11: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30 +# GFX11: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] -# GFX11: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] -# GFX11: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_mul_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] -# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff +# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13 +# GFX11: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] -# GFX11: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30 +# GFX11: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13 +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] -# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13 +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] -# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13 +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] -# GFX11: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01 +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13 +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] -# GFX11: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30 +# GFX11: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] -# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff +# GFX11: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] -# GFX11: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01 +# GFX11: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] -# GFX11: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13 +# GFX11: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] -# GFX11: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30 +# GFX11: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 -# GFX11: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] -# GFX11: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] -# GFX11: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_sub_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] -# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff +# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01 +# GFX11: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] -# GFX11: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13 +# GFX11: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] -# GFX11: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30 +# GFX11: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01 +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13 +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] -# GFX11: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30 +# GFX11: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 -# GFX11: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01 +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] -# GFX11: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13 +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] -# GFX11: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30 +# GFX11-REAL16: v_subrev_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] +# GFX11-FAKE16: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] -# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01 +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] -# GFX11: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13 +# GFX11: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] -# GFX11: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30 +# GFX11: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01 +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13 +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] -# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30 +# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] -# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01 +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] -# GFX11: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13 +# GFX11: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] -# GFX11: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30 +# GFX11: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] -# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff +# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01 +# GFX11: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] -# GFX11: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13 +# GFX11: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] -# GFX11: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30 +# GFX11: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt index 5f1d4d4b33cbd1..a1d2c34f09f2b2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2_dpp8.txt @@ -1,250 +1,267 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W32,GFX11-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX11,W64,GFX11-FAKE16 %s +0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 -# GFX11: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] -# GFX11: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_add_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] -# GFX11: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05 +# GFX11: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] -# GFX11: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00 +# GFX11: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] -# GFX11: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05 +# GFX11: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] -# GFX11: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00 +# GFX11: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] -# GFX11: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05 +# GFX11: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] -# GFX11: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00 +# GFX11: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] -# GFX11: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05 +# GFX11: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] -# GFX11: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00 +# GFX11: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] -# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00 +# GFX11: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] -# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05 +# GFX11: v_dot2acc_f32_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x04,0x01,0x77,0x39,0x05] -# GFX11: v_dot2acc_f32_f16_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x05,0xff,0x00,0x00,0x00 +# GFX11: v_dot2acc_f32_f16_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x05,0xff,0x00,0x00,0x00] -# GFX11: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05 +# GFX11: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] -# GFX11: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00 +# GFX11: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] -# GFX11: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05 +# GFX11: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] -# GFX11: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00 +# GFX11: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] -# GFX11: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] -# GFX11: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_ldexp_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] -# GFX11: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05 +# GFX11: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] -# GFX11: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00 +# GFX11: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] -# GFX11: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05 +# GFX11: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] -# GFX11: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00 +# GFX11: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] -# GFX11: v_max_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_max_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_max_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x72,0x01,0x77,0x39,0x05] -# GFX11: v_max_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_max_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_max_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x72,0x7f,0x00,0x00,0x00] -# GFX11: v_max_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05 +# GFX11: v_max_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x20,0x01,0x77,0x39,0x05] -# GFX11: v_max_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x21,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x21,0xff,0x00,0x00,0x00 +# GFX11: v_max_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x21,0xff,0x00,0x00,0x00] -# GFX11: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05 +# GFX11: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] -# GFX11: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00 +# GFX11: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] -# GFX11: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05 +# GFX11: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] -# GFX11: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00 +# GFX11: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] -# GFX11: v_min_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_min_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_min_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x74,0x01,0x77,0x39,0x05] -# GFX11: v_min_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_min_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_min_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x74,0x7f,0x00,0x00,0x00] -# GFX11: v_min_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05 +# GFX11: v_min_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x1e,0x01,0x77,0x39,0x05] -# GFX11: v_min_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00 +# GFX11: v_min_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x1f,0xff,0x00,0x00,0x00] -# GFX11: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05 +# GFX11: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] -# GFX11: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00 +# GFX11: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] -# GFX11: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05 +# GFX11: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] -# GFX11: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00 +# GFX11: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] -# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05 +# GFX11: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] -# GFX11: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00 +# GFX11: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] -# GFX11: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] -# GFX11: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_mul_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] -# GFX11: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05 +# GFX11: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] -# GFX11: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00 +# GFX11: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] -# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05 +# GFX11: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] -# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00 +# GFX11: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] -# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05 +# GFX11: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] -# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00 +# GFX11: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] -# GFX11: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05 +# GFX11: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] -# GFX11: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00 +# GFX11: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] -# GFX11: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05 +# GFX11: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] -# GFX11: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00 +# GFX11: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] -# GFX11: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05 +# GFX11: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] -# GFX11: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00 +# GFX11: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 -# GFX11: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] -# GFX11: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_sub_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] -# GFX11: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05 +# GFX11: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] -# GFX11: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00 +# GFX11: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] -# GFX11: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05 +# GFX11: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] -# GFX11: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00 +# GFX11: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 -# GFX11: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05 +# GFX11-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] +# GFX11-FAKE16: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] -# GFX11: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00 +# GFX11-REAL16: v_subrev_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] +# GFX11-FAKE16: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] -# GFX11: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05 +# GFX11: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] -# GFX11: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00 +# GFX11: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] -# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05 +# GFX11: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] -# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00 +# GFX11: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] -# GFX11: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05 +# GFX11: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] -# GFX11: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00 +# GFX11: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] -# GFX11: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05 +# GFX11: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] -# GFX11: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00 +# GFX11: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt index 08e9bef8cf6785..233c2e1b9d083b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt @@ -160,6 +160,12 @@ # GFX12: image_load v[4:7], [v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00] 0x01,0x00,0xc0,0xd3,0x04,0x08,0x00,0x00,0x01,0x00,0x00,0x00 +# GFX12: image_load v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_load v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x00,0xc0,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_load_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] 0x00,0x40,0xc0,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00 @@ -409,6 +415,12 @@ # GFX12: image_store v[1:4], [v2, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] 0x01,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 +# GFX12: image_store v[1:4], [v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_store v[1:4], [v3, v2, v1, v0], s[4:11] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x80,0xc1,0xd3,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_store_mip v[252:255], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00] 0x00,0xc0,0xc1,0xd3,0xfc,0x00,0x00,0x00,0x00,0x01,0x00,0x00 @@ -568,6 +580,12 @@ # GFX12: image_atomic_swap v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] 0x01,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 +# GFX12: image_atomic_swap v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_atomic_swap v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x80,0x42,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_atomic_cmpswap v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0xc0,0xc2,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 @@ -625,6 +643,12 @@ # GFX12: image_atomic_add_uint v1, [v2, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D ; encoding: [0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00] 0x01,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x00,0x00,0x00 +# GFX12: image_atomic_add_uint v1, [v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_3D ; encoding: [0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00] +0x02,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x02,0x01,0x00,0x00 + +# GFX12: image_atomic_add_uint v1, [v3, v2, v1, v0], s[4:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY ; encoding: [0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00] +0x07,0x00,0x43,0xd0,0x01,0x08,0x00,0x00,0x03,0x02,0x01,0x00 + # GFX12: image_atomic_sub_uint v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D ; encoding: [0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0x40,0x43,0xd0,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt index 673db0664fc6ab..1276d898160b33 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt @@ -1,2228 +1,2336 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-FAKE16 %s +0x01,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x40] -0x01,0x05,0x0a,0x40 +0xff,0x05,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x40] -0xff,0x05,0x0a,0x40 +0x01,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x40] -0x01,0x04,0x0a,0x40 +0x69,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x40] -0x69,0x04,0x0a,0x40 +0x6a,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x40] -0x6a,0x04,0x0a,0x40 +0x6b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x40] -0x6b,0x04,0x0a,0x40 +0x7b,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x40] -0x7b,0x04,0x0a,0x40 +0x7d,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x40] -0x7d,0x04,0x0a,0x40 +0x7e,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x40] -0x7e,0x04,0x0a,0x40 +0x7f,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x40] -0x7f,0x04,0x0a,0x40 +0x7c,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x40] -0x7c,0x04,0x0a,0x40 +0xc1,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x40] -0xc1,0x04,0x0a,0x40 +0xf0,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x40] -0xf0,0x04,0x0a,0x40 +0xfd,0x04,0x0a,0x40 # W32: v_add_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x40] # W64: v_add_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x40] -0xfd,0x04,0x0a,0x40 +0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf # W32: v_add_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] # W64: v_add_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x41,0x56,0x34,0x12,0xaf -# GFX12: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] 0x01,0x05,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x64] 0x7f,0x05,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x64] 0x01,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x64] 0x69,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x64] 0x6a,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x64] 0x6b,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x64] 0x7b,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x64] 0x7d,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x64] 0x7e,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x64] 0x7f,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x64] 0x7c,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x64] 0xc1,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x64] 0xf0,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x64] 0xfd,0x04,0x0a,0x64 +# GFX12-REAL16: v_add_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x64] +# GFX12-FAKE16: v_add_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x64] -# GFX12: v_add_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_add_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_add_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x64,0x0b,0xfe,0x00,0x00] -# GFX12: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] 0x01,0x05,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] 0xff,0x05,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] 0x01,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] 0x69,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] 0x6a,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] 0x6b,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] 0x7b,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] 0x7d,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] 0x7e,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] 0x7f,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] 0x7c,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] 0xc1,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] 0xf0,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] 0xfd,0x04,0x0a,0x06 +# GFX12: v_add_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x06] -# GFX12: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf +# GFX12: v_add_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x07,0x56,0x34,0x12,0xaf] -# GFX12: v_add_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x04] 0x01,0x07,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x04] 0xfe,0x05,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x04] 0x00,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x04] 0x68,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x04] 0x6a,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x04] 0x7a,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x04] 0x7e,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x04] 0x7c,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x04] 0xc1,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x04] 0xf0,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x04] 0xfd,0x04,0x0a,0x04 +# GFX12: v_add_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x04] -# GFX12: v_add_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf +# GFX12: v_add_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x05,0x56,0x34,0x12,0xaf] -# GFX12: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] 0x01,0x05,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] 0xff,0x05,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] 0x01,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] 0x69,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] 0x6a,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] 0x6b,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] 0x7b,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] 0x7d,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] 0x7e,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] 0x7f,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] 0x7c,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] 0xc1,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] 0xf0,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] 0xfd,0x04,0x0a,0x4a +# GFX12: v_add_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4a] -# GFX12: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf +# GFX12: v_add_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4b,0x56,0x34,0x12,0xaf] -# GFX12: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] 0x01,0x05,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] 0xff,0x05,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] 0x01,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] 0x69,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] 0x6a,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] 0x6b,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] 0x7b,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] 0x7d,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] 0x7e,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] 0x7f,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] 0x7c,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] 0xc1,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] 0xf0,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] 0xfd,0x04,0x0a,0x36 +# GFX12: v_and_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x36] -# GFX12: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf +# GFX12: v_and_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x37,0x56,0x34,0x12,0xaf] -# GFX12: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] 0x01,0x05,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] 0xff,0x05,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] 0x01,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] 0x69,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] 0x6a,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] 0x6b,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] 0x7b,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] 0x7d,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] 0x7e,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] 0x7f,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] 0x7c,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] 0xc1,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] 0xf0,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] 0xfd,0x04,0x0a,0x34 +# GFX12: v_ashrrev_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x34] -# GFX12: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf +# GFX12: v_ashrrev_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x35,0x56,0x34,0x12,0xaf] -# W32: v_cndmask_b32_e32 v5, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x02] 0x01,0x05,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x02] 0xff,0x05,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x02] 0x01,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x02] 0x69,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x02] 0x6a,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x02] 0x6b,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x02] 0x7b,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x02] 0x7d,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x02] 0x7e,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x02] 0x7f,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x02] 0x7c,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x02] 0xc1,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x02] 0xf0,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x02] -# W32: v_cndmask_b32_e32 v5, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x02] -# W64: v_cndmask_b32_e32 v5, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x02] 0xfd,0x04,0x0a,0x02 +# W32: v_cndmask_b32_e32 v5, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x02] +# W64: v_cndmask_b32_e32 v5, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x02] +0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf # W32: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] # W64: v_cndmask_b32_e32 v255, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x03,0x56,0x34,0x12,0xaf -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] 0x01,0x05,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] 0xff,0x05,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] 0x01,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] 0x69,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] 0x6a,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] 0x6b,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] 0x7b,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] 0x7d,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] 0x7e,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] 0x7f,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] 0x7c,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] 0xc1,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] 0xf0,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] 0xfd,0x04,0x0a,0x5e +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x5e] -# GFX12: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf +# GFX12: v_cvt_pk_rtz_f16_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x5f,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, v1, v2, 0xfe0b ; encoding: [0x01,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, v127, v2, 0xfe0b ; encoding: [0x7f,0x05,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, s1, v2, 0xfe0b ; encoding: [0x01,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, s105, v2, 0xfe0b ; encoding: [0x69,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, vcc_lo, v2, 0xfe0b ; encoding: [0x6a,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, vcc_hi, v2, 0xfe0b ; encoding: [0x6b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, ttmp15, v2, 0xfe0b ; encoding: [0x7b,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, m0, v2, 0xfe0b ; encoding: [0x7d,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, exec_lo, v2, 0xfe0b ; encoding: [0x7e,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, exec_hi, v2, 0xfe0b ; encoding: [0x7f,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, null, v2, 0xfe0b ; encoding: [0x7c,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, -1, v2, 0xfe0b ; encoding: [0xc1,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, 0.5, v2, 0xfe0b ; encoding: [0xf0,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] 0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v5, src_scc, v2, 0xfe0b ; encoding: [0xfd,0x04,0x0a,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmaak_f16 v127, 0xfe0b, v127, 0xfe0b ; encoding: [0xff,0xfe,0xfe,0x70,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, v1, v2, 0xaf123456 ; encoding: [0x01,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, v255, v2, 0xaf123456 ; encoding: [0xff,0x05,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, s1, v2, 0xaf123456 ; encoding: [0x01,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, s105, v2, 0xaf123456 ; encoding: [0x69,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, vcc_lo, v2, 0xaf123456 ; encoding: [0x6a,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, vcc_hi, v2, 0xaf123456 ; encoding: [0x6b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, ttmp15, v2, 0xaf123456 ; encoding: [0x7b,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, m0, v2, 0xaf123456 ; encoding: [0x7d,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, exec_lo, v2, 0xaf123456 ; encoding: [0x7e,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, exec_hi, v2, 0xaf123456 ; encoding: [0x7f,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, null, v2, 0xaf123456 ; encoding: [0x7c,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, -1, v2, 0xaf123456 ; encoding: [0xc1,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, 0.5, v2, 0xaf123456 ; encoding: [0xf0,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] 0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v5, src_scc, v2, 0xaf123456 ; encoding: [0xfd,0x04,0x0a,0x5a,0x56,0x34,0x12,0xaf] -# GFX12: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf +# GFX12: v_fmaak_f32 v255, 0xaf123456, v255, 0xaf123456 ; encoding: [0xff,0xfe,0xff,0x5b,0x56,0x34,0x12,0xaf] -# GFX12: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] 0x01,0x05,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] 0x7f,0x05,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] 0x01,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] 0x69,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] 0x6a,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] 0x6b,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] 0x7b,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] 0x7d,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] 0x7e,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] 0x7f,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] 0x7c,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] 0xc1,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] 0xf0,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] 0xfd,0x04,0x0a,0x6c +# GFX12: v_fmac_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6c] -# GFX12: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmac_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6c,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] 0x01,0x05,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] 0xff,0x05,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] 0x01,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] 0x69,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] 0x6a,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] 0x6b,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] 0x7b,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] 0x7d,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] 0x7e,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] 0x7f,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] 0x7c,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] 0xc1,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] 0xf0,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] 0xfd,0x04,0x0a,0x56 +# GFX12: v_fmac_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x56] -# GFX12: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf +# GFX12: v_fmac_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x57,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, v1, 0xfe0b, v3 ; encoding: [0x01,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, v127, 0xfe0b, v3 ; encoding: [0x7f,0x07,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, s1, 0xfe0b, v3 ; encoding: [0x01,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, s105, 0xfe0b, v3 ; encoding: [0x69,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, vcc_lo, 0xfe0b, v3 ; encoding: [0x6a,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, vcc_hi, 0xfe0b, v3 ; encoding: [0x6b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, ttmp15, 0xfe0b, v3 ; encoding: [0x7b,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, m0, 0xfe0b, v3 ; encoding: [0x7d,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, exec_lo, 0xfe0b, v3 ; encoding: [0x7e,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, exec_hi, 0xfe0b, v3 ; encoding: [0x7f,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, null, 0xfe0b, v3 ; encoding: [0x7c,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, -1, 0xfe0b, v3 ; encoding: [0xc1,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, 0.5, 0xfe0b, v3 ; encoding: [0xf0,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] 0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v5, src_scc, 0xfe0b, v3 ; encoding: [0xfd,0x06,0x0a,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00 +# GFX12: v_fmamk_f16 v127, 0xfe0b, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6e,0x0b,0xfe,0x00,0x00] -# GFX12: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, v1, 0xaf123456, v3 ; encoding: [0x01,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, v255, 0xaf123456, v3 ; encoding: [0xff,0x07,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, s1, 0xaf123456, v3 ; encoding: [0x01,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, s105, 0xaf123456, v3 ; encoding: [0x69,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, vcc_lo, 0xaf123456, v3 ; encoding: [0x6a,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, vcc_hi, 0xaf123456, v3 ; encoding: [0x6b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, ttmp15, 0xaf123456, v3 ; encoding: [0x7b,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, m0, 0xaf123456, v3 ; encoding: [0x7d,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, exec_lo, 0xaf123456, v3 ; encoding: [0x7e,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, exec_hi, 0xaf123456, v3 ; encoding: [0x7f,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, null, 0xaf123456, v3 ; encoding: [0x7c,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, -1, 0xaf123456, v3 ; encoding: [0xc1,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, 0.5, 0xaf123456, v3 ; encoding: [0xf0,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] 0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v5, src_scc, 0xaf123456, v3 ; encoding: [0xfd,0x06,0x0a,0x58,0x56,0x34,0x12,0xaf] -# GFX12: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf +# GFX12: v_fmamk_f32 v255, 0xaf123456, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x59,0x56,0x34,0x12,0xaf] -# GFX12: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] 0x01,0x05,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] 0x7f,0x05,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] 0x01,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] 0x69,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] 0x6a,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] 0x6b,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] 0x7b,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] 0x7d,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] 0x7e,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] 0x7f,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] 0x7c,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] 0xc1,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] 0xf0,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] 0xfd,0x04,0x0a,0x76 +# GFX12-REAL16: v_ldexp_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x76] +# GFX12-FAKE16: v_ldexp_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x76] -# GFX12: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_ldexp_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_ldexp_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x76,0x0b,0xfe,0x00,0x00] -# GFX12: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] 0x01,0x05,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] 0xff,0x05,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] 0x01,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] 0x69,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] 0x6a,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] 0x6b,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] 0x7b,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] 0x7d,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] 0x7e,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] 0x7f,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] 0x7c,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] 0xc1,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] 0xf0,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] 0xfd,0x04,0x0a,0x30 +# GFX12: v_lshlrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x30] -# GFX12: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf +# GFX12: v_lshlrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x31,0x56,0x34,0x12,0xaf] -# GFX12: v_lshlrev_b64_e32 v[5:6], v1, v[3:4] ; encoding: [0x01,0x07,0x0a,0x3e] 0x01,0x07,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], v1, v[3:4] ; encoding: [0x01,0x07,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], v255, v[2:3] ; encoding: [0xff,0x05,0x0a,0x3e] 0xff,0x05,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], v255, v[2:3] ; encoding: [0xff,0x05,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], s1, v[2:3] ; encoding: [0x01,0x04,0x0a,0x3e] 0x01,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], s1, v[2:3] ; encoding: [0x01,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], s105, v[2:3] ; encoding: [0x69,0x04,0x0a,0x3e] 0x69,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], s105, v[2:3] ; encoding: [0x69,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_lo, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x3e] 0x6a,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_lo, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_hi, v[2:3] ; encoding: [0x6b,0x04,0x0a,0x3e] 0x6b,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], vcc_hi, v[2:3] ; encoding: [0x6b,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], ttmp15, v[2:3] ; encoding: [0x7b,0x04,0x0a,0x3e] 0x7b,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], ttmp15, v[2:3] ; encoding: [0x7b,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], exec_lo, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x3e] 0x7e,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], exec_lo, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], exec_hi, v[2:3] ; encoding: [0x7f,0x04,0x0a,0x3e] 0x7f,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], exec_hi, v[2:3] ; encoding: [0x7f,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x3e] 0x7c,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x3e] 0xc1,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x3e] 0xf0,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x3e] 0xfd,0x04,0x0a,0x3e +# GFX12: v_lshlrev_b64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x3e] -# GFX12: v_lshlrev_b64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf +# GFX12: v_lshlrev_b64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x3f,0x56,0x34,0x12,0xaf] -# GFX12: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] 0x01,0x05,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] 0xff,0x05,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] 0x01,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] 0x69,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] 0x6a,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] 0x6b,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] 0x7b,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] 0x7d,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] 0x7e,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] 0x7f,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] 0x7c,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] 0xc1,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] 0xf0,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] 0xfd,0x04,0x0a,0x32 +# GFX12: v_lshrrev_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x32] -# GFX12: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf +# GFX12: v_lshrrev_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x33,0x56,0x34,0x12,0xaf] -# GFX12: v_max_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x62] 0x01,0x05,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x62] 0x7f,0x05,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x62] 0x01,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x62] 0x69,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x62] 0x6a,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x62] 0x6b,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x62] 0x7b,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x62] 0x7d,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x62] 0x7e,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x62] 0x7f,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x62] 0x7c,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x62] 0xc1,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x62] 0xf0,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x62] 0xfd,0x04,0x0a,0x62 +# GFX12-REAL16: v_max_num_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x62] +# GFX12-FAKE16: v_max_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x62] -# GFX12: v_max_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_max_num_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_max_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x62,0x0b,0xfe,0x00,0x00] -# GFX12: v_max_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] 0x01,0x05,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2c] 0xff,0x05,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2c] 0x01,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2c] 0x69,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2c] 0x6a,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2c] 0x6b,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2c] 0x7b,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2c] 0x7d,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2c] 0x7e,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2c] 0x7f,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2c] 0x7c,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2c] 0xc1,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2c] 0xf0,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2c] 0xfd,0x04,0x0a,0x2c +# GFX12: v_max_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2c] -# GFX12: v_max_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf +# GFX12: v_max_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2d,0x56,0x34,0x12,0xaf] -# GFX12: v_max_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1c] 0x01,0x07,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1c] 0xfe,0x05,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1c] 0x00,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1c] 0x68,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1c] 0x6a,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1c] 0x7a,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1c] 0x7e,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1c] 0x7c,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1c] 0xc1,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1c] 0xf0,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1c] 0xfd,0x04,0x0a,0x1c +# GFX12: v_max_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1c] -# GFX12: v_max_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf +# GFX12: v_max_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1d,0x56,0x34,0x12,0xaf] -# GFX12: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] 0x01,0x05,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] 0xff,0x05,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] 0x01,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] 0x69,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] 0x6a,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] 0x6b,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] 0x7b,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] 0x7d,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] 0x7e,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] 0x7f,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] 0x7c,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] 0xc1,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] 0xf0,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] 0xfd,0x04,0x0a,0x24 +# GFX12: v_max_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x24] -# GFX12: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf +# GFX12: v_max_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x25,0x56,0x34,0x12,0xaf] -# GFX12: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] 0x01,0x05,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] 0xff,0x05,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] 0x01,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] 0x69,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] 0x6a,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] 0x6b,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] 0x7b,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] 0x7d,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] 0x7e,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] 0x7f,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] 0x7c,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] 0xc1,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] 0xf0,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] 0xfd,0x04,0x0a,0x28 +# GFX12: v_max_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x28] -# GFX12: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf +# GFX12: v_max_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x29,0x56,0x34,0x12,0xaf] -# GFX12: v_min_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x60] 0x01,0x05,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x60] 0x7f,0x05,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x60] 0x01,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x60] 0x69,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x60] 0x6a,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x60] 0x6b,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x60] 0x7b,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x60] 0x7d,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x60] 0x7e,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x60] 0x7f,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x60] 0x7c,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x60] 0xc1,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x60] 0xf0,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x60] 0xfd,0x04,0x0a,0x60 +# GFX12-REAL16: v_min_num_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x60] +# GFX12-FAKE16: v_min_num_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x60] -# GFX12: v_min_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_min_num_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_min_num_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x60,0x0b,0xfe,0x00,0x00] -# GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] 0x01,0x05,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2a] 0xff,0x05,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2a] 0x01,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2a] 0x69,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2a] 0x6a,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2a] 0x6b,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2a] 0x7b,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2a] 0x7d,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2a] 0x7e,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2a] 0x7f,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2a] 0x7c,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2a] 0xc1,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2a] 0xf0,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2a] 0xfd,0x04,0x0a,0x2a +# GFX12: v_min_num_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x2a] -# GFX12: v_min_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf +# GFX12: v_min_num_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x2b,0x56,0x34,0x12,0xaf] -# GFX12: v_min_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1a] 0x01,0x07,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1a] 0xfe,0x05,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1a] 0x00,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1a] 0x68,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1a] 0x6a,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1a] 0x7a,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1a] 0x7e,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1a] 0x7c,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1a] 0xc1,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1a] 0xf0,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1a] 0xfd,0x04,0x0a,0x1a +# GFX12: v_min_num_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x1a] -# GFX12: v_min_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf +# GFX12: v_min_num_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x1b,0x56,0x34,0x12,0xaf] -# GFX12: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] 0x01,0x05,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] 0xff,0x05,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] 0x01,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] 0x69,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] 0x6a,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] 0x6b,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] 0x7b,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] 0x7d,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] 0x7e,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] 0x7f,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] 0x7c,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] 0xc1,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] 0xf0,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] 0xfd,0x04,0x0a,0x22 +# GFX12: v_min_i32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x22] -# GFX12: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf +# GFX12: v_min_i32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x23,0x56,0x34,0x12,0xaf] -# GFX12: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] 0x01,0x05,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] 0xff,0x05,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] 0x01,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] 0x69,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] 0x6a,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] 0x6b,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] 0x7b,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] 0x7d,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] 0x7e,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] 0x7f,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] 0x7c,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] 0xc1,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] 0xf0,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] 0xfd,0x04,0x0a,0x26 +# GFX12: v_min_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x26] -# GFX12: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf +# GFX12: v_min_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x27,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] 0x01,0x05,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] 0xff,0x05,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] 0x01,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] 0x69,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] 0x6a,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] 0x6b,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] 0x7b,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] 0x7d,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] 0x7e,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] 0x7f,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] 0x7c,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] 0xc1,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] 0xf0,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] 0xfd,0x04,0x0a,0x0e +# GFX12: v_mul_dx9_zero_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0e] -# GFX12: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf +# GFX12: v_mul_dx9_zero_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0f,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6a] 0x01,0x05,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6a] 0x7f,0x05,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6a] 0x01,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6a] 0x69,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6a] 0x6a,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6a] 0x6b,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6a] 0x7b,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6a] 0x7d,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6a] 0x7e,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6a] 0x7f,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6a] 0x7c,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6a] 0xc1,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6a] 0xf0,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6a] 0xfd,0x04,0x0a,0x6a +# GFX12-REAL16: v_mul_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x6a] +# GFX12-FAKE16: v_mul_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x6a] -# GFX12: v_mul_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_mul_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_mul_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x6a,0x0b,0xfe,0x00,0x00] -# GFX12: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] 0x01,0x05,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] 0xff,0x05,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] 0x01,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] 0x69,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] 0x6a,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] 0x6b,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] 0x7b,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] 0x7d,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] 0x7e,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] 0x7f,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] 0x7c,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] 0xc1,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] 0xf0,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] 0xfd,0x04,0x0a,0x10 +# GFX12: v_mul_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x10] -# GFX12: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf +# GFX12: v_mul_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x11,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x0c] 0x01,0x07,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], v[1:2], v[3:4] ; encoding: [0x01,0x07,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x0c] 0xfe,0x05,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], v[254:255], v[2:3] ; encoding: [0xfe,0x05,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x0c] 0x00,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], s[0:1], v[2:3] ; encoding: [0x00,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x0c] 0x68,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], s[104:105], v[2:3] ; encoding: [0x68,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x0c] 0x6a,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], vcc, v[2:3] ; encoding: [0x6a,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x0c] 0x7a,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], ttmp[14:15], v[2:3] ; encoding: [0x7a,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x0c] 0x7e,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], exec, v[2:3] ; encoding: [0x7e,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x0c] 0x7c,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], null, v[2:3] ; encoding: [0x7c,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x0c] 0xc1,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], -1, v[2:3] ; encoding: [0xc1,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x0c] 0xf0,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], 0.5, v[2:3] ; encoding: [0xf0,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x0c] 0xfd,0x04,0x0a,0x0c +# GFX12: v_mul_f64_e32 v[5:6], src_scc, v[2:3] ; encoding: [0xfd,0x04,0x0a,0x0c] -# GFX12: v_mul_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf] 0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf +# GFX12: v_mul_f64_e32 v[254:255], 0xaf123456, v[254:255] ; encoding: [0xff,0xfc,0xfd,0x0d,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] 0x01,0x05,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] 0xff,0x05,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] 0x01,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] 0x69,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] 0x6a,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] 0x6b,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] 0x7b,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] 0x7d,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] 0x7e,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] 0x7f,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] 0x7c,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] 0xc1,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] 0xf0,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] 0xfd,0x04,0x0a,0x14 +# GFX12: v_mul_hi_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x14] -# GFX12: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf +# GFX12: v_mul_hi_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x15,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] 0x01,0x05,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] 0xff,0x05,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] 0x01,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] 0x69,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] 0x6a,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] 0x6b,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] 0x7b,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] 0x7d,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] 0x7e,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] 0x7f,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] 0x7c,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] 0xc1,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] 0xf0,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] 0xfd,0x04,0x0a,0x18 +# GFX12: v_mul_hi_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x18] -# GFX12: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf +# GFX12: v_mul_hi_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x19,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] 0x01,0x05,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] 0xff,0x05,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] 0x01,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] 0x69,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] 0x6a,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] 0x6b,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] 0x7b,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] 0x7d,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] 0x7e,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] 0x7f,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] 0x7c,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] 0xc1,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] 0xf0,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] 0xfd,0x04,0x0a,0x12 +# GFX12: v_mul_i32_i24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x12] -# GFX12: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf +# GFX12: v_mul_i32_i24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x13,0x56,0x34,0x12,0xaf] -# GFX12: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] 0x01,0x05,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] 0xff,0x05,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] 0x01,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] 0x69,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] 0x6a,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] 0x6b,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] 0x7b,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] 0x7d,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] 0x7e,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] 0x7f,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] 0x7c,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] 0xc1,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] 0xf0,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] 0xfd,0x04,0x0a,0x16 +# GFX12: v_mul_u32_u24_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x16] -# GFX12: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf +# GFX12: v_mul_u32_u24_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x17,0x56,0x34,0x12,0xaf] -# GFX12: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] 0x01,0x05,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] 0xff,0x05,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] 0x01,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] 0x69,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] 0x6a,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] 0x6b,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] 0x7b,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] 0x7d,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] 0x7e,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] 0x7f,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] 0x7c,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] 0xc1,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] 0xf0,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] 0xfd,0x04,0x0a,0x38 +# GFX12: v_or_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x38] -# GFX12: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf +# GFX12: v_or_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x39,0x56,0x34,0x12,0xaf] -# GFX12: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] 0x01,0x05,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] 0xff,0x05,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] 0x01,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] 0x69,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] 0x6a,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] 0x6b,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] 0x7b,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] 0x7d,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] 0x7e,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] 0x7f,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] 0x7c,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] 0xc1,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] 0xf0,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] 0xfd,0x04,0x0a,0x78 +# GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] -# GFX12: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00 +# GFX12: v_pk_fmac_f16 v255, 0xfe0b, v255 ; encoding: [0xff,0xfe,0xff,0x79,0x0b,0xfe,0x00,0x00] +0x01,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x42] -0x01,0x05,0x0a,0x42 +0xff,0x05,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x42] -0xff,0x05,0x0a,0x42 +0x01,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x42] -0x01,0x04,0x0a,0x42 +0x69,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x42] -0x69,0x04,0x0a,0x42 +0x6a,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x42] -0x6a,0x04,0x0a,0x42 +0x6b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x42] -0x6b,0x04,0x0a,0x42 +0x7b,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x42] -0x7b,0x04,0x0a,0x42 +0x7d,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x42] -0x7d,0x04,0x0a,0x42 +0x7e,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x42] -0x7e,0x04,0x0a,0x42 +0x7f,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x42] -0x7f,0x04,0x0a,0x42 +0x7c,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x42] -0x7c,0x04,0x0a,0x42 +0xc1,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x42] -0xc1,0x04,0x0a,0x42 +0xf0,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x42] -0xf0,0x04,0x0a,0x42 +0xfd,0x04,0x0a,0x42 # W32: v_sub_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x42] # W64: v_sub_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x42] -0xfd,0x04,0x0a,0x42 +0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf # W32: v_sub_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] # W64: v_sub_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x43,0x56,0x34,0x12,0xaf -# GFX12: v_sub_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x66] 0x01,0x05,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x66] 0x7f,0x05,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x66] 0x01,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x66] 0x69,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x66] 0x6a,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x66] 0x6b,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x66] 0x7b,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x66] 0x7d,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x66] 0x7e,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x66] 0x7f,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x66] 0x7c,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x66] 0xc1,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x66] 0xf0,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x66] 0xfd,0x04,0x0a,0x66 +# GFX12-REAL16: v_sub_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x66] +# GFX12-FAKE16: v_sub_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x66] -# GFX12: v_sub_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_sub_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_sub_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x66,0x0b,0xfe,0x00,0x00] -# GFX12: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] 0x01,0x05,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] 0xff,0x05,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] 0x01,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] 0x69,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] 0x6a,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] 0x6b,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] 0x7b,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] 0x7d,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] 0x7e,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] 0x7f,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] 0x7c,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] 0xc1,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] 0xf0,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] 0xfd,0x04,0x0a,0x08 +# GFX12: v_sub_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x08] -# GFX12: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf +# GFX12: v_sub_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x09,0x56,0x34,0x12,0xaf] -# GFX12: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] 0x01,0x05,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] 0xff,0x05,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] 0x01,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] 0x69,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] 0x6a,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] 0x6b,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] 0x7b,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] 0x7d,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] 0x7e,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] 0x7f,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] 0x7c,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] 0xc1,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] 0xf0,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] 0xfd,0x04,0x0a,0x4c +# GFX12: v_sub_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4c] -# GFX12: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf +# GFX12: v_sub_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4d,0x56,0x34,0x12,0xaf] +0x01,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v1, v2, vcc_lo ; encoding: [0x01,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v1, v2, vcc ; encoding: [0x01,0x05,0x0a,0x44] -0x01,0x05,0x0a,0x44 +0xff,0x05,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, v255, v2, vcc_lo ; encoding: [0xff,0x05,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, v255, v2, vcc ; encoding: [0xff,0x05,0x0a,0x44] -0xff,0x05,0x0a,0x44 +0x01,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s1, v2, vcc_lo ; encoding: [0x01,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s1, v2, vcc ; encoding: [0x01,0x04,0x0a,0x44] -0x01,0x04,0x0a,0x44 +0x69,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, s105, v2, vcc_lo ; encoding: [0x69,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, s105, v2, vcc ; encoding: [0x69,0x04,0x0a,0x44] -0x69,0x04,0x0a,0x44 +0x6a,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_lo, v2, vcc_lo ; encoding: [0x6a,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_lo, v2, vcc ; encoding: [0x6a,0x04,0x0a,0x44] -0x6a,0x04,0x0a,0x44 +0x6b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, vcc_hi, v2, vcc_lo ; encoding: [0x6b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, vcc_hi, v2, vcc ; encoding: [0x6b,0x04,0x0a,0x44] -0x6b,0x04,0x0a,0x44 +0x7b,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, ttmp15, v2, vcc_lo ; encoding: [0x7b,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, ttmp15, v2, vcc ; encoding: [0x7b,0x04,0x0a,0x44] -0x7b,0x04,0x0a,0x44 +0x7d,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, m0, v2, vcc_lo ; encoding: [0x7d,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, m0, v2, vcc ; encoding: [0x7d,0x04,0x0a,0x44] -0x7d,0x04,0x0a,0x44 +0x7e,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_lo, v2, vcc_lo ; encoding: [0x7e,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_lo, v2, vcc ; encoding: [0x7e,0x04,0x0a,0x44] -0x7e,0x04,0x0a,0x44 +0x7f,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, exec_hi, v2, vcc_lo ; encoding: [0x7f,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, exec_hi, v2, vcc ; encoding: [0x7f,0x04,0x0a,0x44] -0x7f,0x04,0x0a,0x44 +0x7c,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, null, v2, vcc_lo ; encoding: [0x7c,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, null, v2, vcc ; encoding: [0x7c,0x04,0x0a,0x44] -0x7c,0x04,0x0a,0x44 +0xc1,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, -1, v2, vcc_lo ; encoding: [0xc1,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, -1, v2, vcc ; encoding: [0xc1,0x04,0x0a,0x44] -0xc1,0x04,0x0a,0x44 +0xf0,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, 0.5, v2, vcc_lo ; encoding: [0xf0,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, 0.5, v2, vcc ; encoding: [0xf0,0x04,0x0a,0x44] -0xf0,0x04,0x0a,0x44 +0xfd,0x04,0x0a,0x44 # W32: v_subrev_co_ci_u32_e32 v5, vcc_lo, src_scc, v2, vcc_lo ; encoding: [0xfd,0x04,0x0a,0x44] # W64: v_subrev_co_ci_u32_e32 v5, vcc, src_scc, v2, vcc ; encoding: [0xfd,0x04,0x0a,0x44] -0xfd,0x04,0x0a,0x44 +0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf # W32: v_subrev_co_ci_u32_e32 v255, vcc_lo, 0xaf123456, v255, vcc_lo ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] # W64: v_subrev_co_ci_u32_e32 v255, vcc, 0xaf123456, v255, vcc ; encoding: [0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf] -0xff,0xfe,0xff,0x45,0x56,0x34,0x12,0xaf -# GFX12: v_subrev_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x68] 0x01,0x05,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, v1.l, v2.l ; encoding: [0x01,0x05,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x68] 0x7f,0x05,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, v127.l, v2.l ; encoding: [0x7f,0x05,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, v127, v2 ; encoding: [0x7f,0x05,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x68] 0x01,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, s1, v2.l ; encoding: [0x01,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x68] 0x69,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, s105, v2.l ; encoding: [0x69,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x68] 0x6a,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, vcc_lo, v2.l ; encoding: [0x6a,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x68] 0x6b,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, vcc_hi, v2.l ; encoding: [0x6b,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x68] 0x7b,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, ttmp15, v2.l ; encoding: [0x7b,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x68] 0x7d,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, m0, v2.l ; encoding: [0x7d,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x68] 0x7e,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, exec_lo, v2.l ; encoding: [0x7e,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x68] 0x7f,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, exec_hi, v2.l ; encoding: [0x7f,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x68] 0x7c,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, null, v2.l ; encoding: [0x7c,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x68] 0xc1,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, -1, v2.l ; encoding: [0xc1,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x68] 0xf0,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, 0.5, v2.l ; encoding: [0xf0,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x68] 0xfd,0x04,0x0a,0x68 +# GFX12-REAL16: v_subrev_f16_e32 v5.l, src_scc, v2.l ; encoding: [0xfd,0x04,0x0a,0x68] +# GFX12-FAKE16: v_subrev_f16_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x68] -# GFX12: v_subrev_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] 0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00 +# GFX12-REAL16: v_subrev_f16_e32 v127.l, 0xfe0b, v127.l ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] +# GFX12-FAKE16: v_subrev_f16_e32 v127, 0xfe0b, v127 ; encoding: [0xff,0xfe,0xfe,0x68,0x0b,0xfe,0x00,0x00] -# GFX12: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] 0x01,0x05,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] 0xff,0x05,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] 0x01,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] 0x69,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] 0x6a,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] 0x6b,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] 0x7b,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] 0x7d,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] 0x7e,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] 0x7f,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] 0x7c,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] 0xc1,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] 0xf0,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] 0xfd,0x04,0x0a,0x0a +# GFX12: v_subrev_f32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x0a] -# GFX12: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf +# GFX12: v_subrev_f32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x0b,0x56,0x34,0x12,0xaf] -# GFX12: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] 0x01,0x05,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] 0xff,0x05,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] 0x01,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] 0x69,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] 0x6a,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] 0x6b,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] 0x7b,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] 0x7d,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] 0x7e,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] 0x7f,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] 0x7c,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] 0xc1,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] 0xf0,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] 0xfd,0x04,0x0a,0x4e +# GFX12: v_subrev_nc_u32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x4e] -# GFX12: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf +# GFX12: v_subrev_nc_u32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x4f,0x56,0x34,0x12,0xaf] -# GFX12: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] 0x01,0x05,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] 0xff,0x05,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] 0x01,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] 0x69,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] 0x6a,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] 0x6b,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] 0x7b,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] 0x7d,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] 0x7e,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] 0x7f,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] 0x7c,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] 0xc1,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] 0xf0,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] 0xfd,0x04,0x0a,0x3c +# GFX12: v_xnor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3c] -# GFX12: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf +# GFX12: v_xnor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3d,0x56,0x34,0x12,0xaf] -# GFX12: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] 0x01,0x05,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, v1, v2 ; encoding: [0x01,0x05,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] 0xff,0x05,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, v255, v2 ; encoding: [0xff,0x05,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] 0x01,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, s1, v2 ; encoding: [0x01,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] 0x69,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, s105, v2 ; encoding: [0x69,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] 0x6a,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, vcc_lo, v2 ; encoding: [0x6a,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] 0x6b,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, vcc_hi, v2 ; encoding: [0x6b,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] 0x7b,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, ttmp15, v2 ; encoding: [0x7b,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] 0x7d,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, m0, v2 ; encoding: [0x7d,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] 0x7e,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, exec_lo, v2 ; encoding: [0x7e,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] 0x7f,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] 0x7c,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, null, v2 ; encoding: [0x7c,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] 0xc1,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] 0xf0,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] 0xfd,0x04,0x0a,0x3a +# GFX12: v_xor_b32_e32 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x3a] -# GFX12: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] 0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf +# GFX12: v_xor_b32_e32 v255, 0xaf123456, v255 ; encoding: [0xff,0xfe,0xff,0x3b,0x56,0x34,0x12,0xaf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt index 05c8dff02a40b4..551fb0d3111886 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp16.txt @@ -1,1696 +1,1797 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-FAKE16 %s +0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x40,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x40,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x40,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x40,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x41,0xff,0x6f,0x0d,0x30 -# GFX12: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1b,0x00,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0xe4,0x00,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x40,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x41,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x01,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x0f,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x11,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x1f,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x21,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x2f,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x50,0x01,0xff] -# GFX12: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x5f,0x01,0x01] -# GFX12: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x64,0x01,0x60,0x01,0x13] -# GFX12: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_add_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_add_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x64,0x7f,0x6f,0xfd,0x30] -# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1b,0x00,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0xe4,0x00,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x40,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x41,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x01,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x0f,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x11,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x1f,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x21,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x2f,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff +# GFX12: v_add_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x50,0x01,0xff] -# GFX12: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01 +# GFX12: v_add_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x5f,0x01,0x01] -# GFX12: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13 +# GFX12: v_add_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x06,0x01,0x60,0x01,0x13] -# GFX12: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30 +# GFX12: v_add_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x07,0xff,0x6f,0xfd,0x30] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1b,0x00,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0xe4,0x00,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x40,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x41,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x01,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x0f,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x11,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x1f,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x21,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x2f,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x50,0x01,0xff] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x5f,0x01,0x01] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4a,0x01,0x60,0x01,0x13] -# GFX12: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4b,0xff,0x6f,0x0d,0x30] -# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1b,0x00,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0xe4,0x00,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x40,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x41,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x01,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x0f,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x11,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x1f,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x21,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x2f,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff +# GFX12: v_and_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x50,0x01,0xff] -# GFX12: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01 +# GFX12: v_and_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x5f,0x01,0x01] -# GFX12: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13 +# GFX12: v_and_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x36,0x01,0x60,0x01,0x13] -# GFX12: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30 +# GFX12: v_and_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x37,0xff,0x6f,0x0d,0x30] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1b,0x00,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0xe4,0x00,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x40,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x41,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x01,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x0f,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x11,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x1f,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x21,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x2f,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x50,0x01,0xff] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01 +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x5f,0x01,0x01] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13 +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x34,0x01,0x60,0x01,0x13] -# GFX12: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30 +# GFX12: v_ashrrev_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x35,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x02,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x02,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x02,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1b,0x00,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0xe4,0x00,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x40,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x41,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x01,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x0f,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x11,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x1f,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x21,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x2f,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x50,0x01,0xff] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x5f,0x01,0x01] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x5e,0x01,0x60,0x01,0x13] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x5f,0xff,0x6f,0xfd,0x30] -# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1b,0x00,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0xe4,0x00,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x40,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x41,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x01,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x0f,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x11,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x1f,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x21,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x2f,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x50,0x01,0xff] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01 +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x5f,0x01,0x01] -# GFX12: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13 +# GFX12: v_fmac_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6c,0x01,0x60,0x01,0x13] -# GFX12: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30 +# GFX12: v_fmac_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6c,0x7f,0x6f,0xfd,0x30] -# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1b,0x00,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0xe4,0x00,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x40,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x41,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x01,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x0f,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x11,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x1f,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x21,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x2f,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x50,0x01,0xff] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01 +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x5f,0x01,0x01] -# GFX12: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13 +# GFX12: v_fmac_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x56,0x01,0x60,0x01,0x13] -# GFX12: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30 +# GFX12: v_fmac_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x57,0xff,0x6f,0xfd,0x30] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1b,0x00,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0xe4,0x00,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x40,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x41,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x01,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x0f,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x11,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x1f,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x21,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x2f,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x50,0x01,0xff] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x5f,0x01,0x01] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x76,0x01,0x60,0x01,0x13] -# GFX12: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] 0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30 +# GFX12-REAL16: v_ldexp_f16_dpp v127.l, -|v127.l|, v127.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] +# GFX12-FAKE16: v_ldexp_f16_dpp v127, -|v127|, v127 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x76,0x7f,0x6f,0x3d,0x30] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1b,0x00,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0xe4,0x00,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x40,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x41,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x01,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x0f,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x11,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x1f,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x21,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x2f,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x50,0x01,0xff] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01 +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x5f,0x01,0x01] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13 +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x30,0x01,0x60,0x01,0x13] -# GFX12: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshlrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x31,0xff,0x6f,0x0d,0x30] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1b,0x00,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0xe4,0x00,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x40,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x41,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x01,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x0f,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x11,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x1f,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x21,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x2f,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x50,0x01,0xff] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01 +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x5f,0x01,0x01] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13 +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x32,0x01,0x60,0x01,0x13] -# GFX12: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshrrev_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x33,0xff,0x6f,0x0d,0x30] -# GFX12: v_max_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1b,0x00,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0xe4,0x00,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x40,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x41,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x01,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x0f,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x11,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x1f,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x21,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x2f,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x50,0x01,0xff] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x5f,0x01,0x01] -# GFX12: v_max_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x62,0x01,0x60,0x01,0x13] -# GFX12: v_max_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_max_num_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_max_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x62,0x7f,0x6f,0xfd,0x30] -# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1b,0x00,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0xe4,0x00,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x40,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x41,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x01,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x0f,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x11,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x1f,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x21,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x2f,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x50,0x01,0xff] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01 +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x5f,0x01,0x01] -# GFX12: v_max_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x2c,0x01,0x60,0x01,0x13 +# GFX12: v_max_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2c,0x01,0x60,0x01,0x13] -# GFX12: v_max_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xfd,0x30 +# GFX12: v_max_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2d,0xff,0x6f,0xfd,0x30] -# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1b,0x00,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0xe4,0x00,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x40,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x41,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x01,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x0f,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x11,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x1f,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x21,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x2f,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff +# GFX12: v_max_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x50,0x01,0xff] -# GFX12: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01 +# GFX12: v_max_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x5f,0x01,0x01] -# GFX12: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13 +# GFX12: v_max_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x24,0x01,0x60,0x01,0x13] -# GFX12: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x25,0xff,0x6f,0x0d,0x30] -# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1b,0x00,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0xe4,0x00,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x40,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x41,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x01,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x0f,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x11,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x1f,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x21,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x2f,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff +# GFX12: v_max_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x50,0x01,0xff] -# GFX12: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01 +# GFX12: v_max_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x5f,0x01,0x01] -# GFX12: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13 +# GFX12: v_max_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x28,0x01,0x60,0x01,0x13] -# GFX12: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x29,0xff,0x6f,0x0d,0x30] -# GFX12: v_min_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1b,0x00,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0xe4,0x00,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x40,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x41,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x01,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x0f,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x11,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x1f,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x21,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x2f,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x50,0x01,0xff] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x5f,0x01,0x01] -# GFX12: v_min_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x60,0x01,0x60,0x01,0x13] -# GFX12: v_min_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_min_num_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_min_num_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x60,0x7f,0x6f,0xfd,0x30] -# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1b,0x00,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0xe4,0x00,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x40,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x41,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x01,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x0f,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x11,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x1f,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x21,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x2f,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x50,0x01,0xff] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01 +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x5f,0x01,0x01] -# GFX12: v_min_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x2a,0x01,0x60,0x01,0x13 +# GFX12: v_min_num_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x2a,0x01,0x60,0x01,0x13] -# GFX12: v_min_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xfd,0x30 +# GFX12: v_min_num_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x2b,0xff,0x6f,0xfd,0x30] -# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1b,0x00,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0xe4,0x00,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x40,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x41,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x01,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x0f,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x11,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x1f,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x21,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x2f,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff +# GFX12: v_min_i32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x50,0x01,0xff] -# GFX12: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01 +# GFX12: v_min_i32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x5f,0x01,0x01] -# GFX12: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13 +# GFX12: v_min_i32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x22,0x01,0x60,0x01,0x13] -# GFX12: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_i32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x23,0xff,0x6f,0x0d,0x30] -# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1b,0x00,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0xe4,0x00,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x40,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x41,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x01,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x0f,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x11,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x1f,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x21,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x2f,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff +# GFX12: v_min_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x50,0x01,0xff] -# GFX12: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01 +# GFX12: v_min_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x5f,0x01,0x01] -# GFX12: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13 +# GFX12: v_min_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x26,0x01,0x60,0x01,0x13] -# GFX12: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x27,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x40,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x41,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x01,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x11,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x21,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x50,0x01,0xff] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13 +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0e,0x01,0x60,0x01,0x13] -# GFX12: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30 +# GFX12: v_mul_dx9_zero_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0f,0xff,0x6f,0xfd,0x30] -# GFX12: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x40,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x41,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x01,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x11,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x21,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x50,0x01,0xff] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x6a,0x01,0x60,0x01,0x13] -# GFX12: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_mul_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_mul_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x6a,0x7f,0x6f,0xfd,0x30] -# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x40,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x41,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x01,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x11,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x21,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff +# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x50,0x01,0xff] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13 +# GFX12: v_mul_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x10,0x01,0x60,0x01,0x13] -# GFX12: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30 +# GFX12: v_mul_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x11,0xff,0x6f,0xfd,0x30] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x40,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x41,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x01,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x11,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x21,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x50,0x01,0xff] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13 +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x14,0x01,0x60,0x01,0x13] -# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x15,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x40,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x41,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x01,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x11,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x21,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x50,0x01,0xff] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13 +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x18,0x01,0x60,0x01,0x13] -# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x19,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x40,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x41,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x01,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x11,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x21,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x50,0x01,0xff] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13 +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x12,0x01,0x60,0x01,0x13] -# GFX12: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_i32_i24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x13,0xff,0x6f,0x0d,0x30] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1b,0x00,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0xe4,0x00,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x40,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x41,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x01,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x0f,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x11,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x1f,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x21,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x2f,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x50,0x01,0xff] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x5f,0x01,0x01] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13 +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x16,0x01,0x60,0x01,0x13] -# GFX12: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_u32_u24_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x17,0xff,0x6f,0x0d,0x30] -# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1b,0x00,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0xe4,0x00,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x40,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x41,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x01,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x0f,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x11,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x1f,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x21,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x2f,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff +# GFX12: v_or_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x50,0x01,0xff] -# GFX12: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01 +# GFX12: v_or_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x5f,0x01,0x01] -# GFX12: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13 +# GFX12: v_or_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x38,0x01,0x60,0x01,0x13] -# GFX12: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30 +# GFX12: v_or_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x39,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x42,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x42,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x42,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x42,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x43,0xff,0x6f,0x0d,0x30 -# GFX12: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1b,0x00,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0xe4,0x00,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x40,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x41,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x01,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x0f,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x11,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x1f,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x21,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x2f,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x50,0x01,0xff] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x5f,0x01,0x01] -# GFX12: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x66,0x01,0x60,0x01,0x13] -# GFX12: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_sub_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_sub_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x66,0x7f,0x6f,0xfd,0x30] -# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1b,0x00,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0xe4,0x00,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x40,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x41,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x01,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x0f,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x11,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x1f,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x21,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x2f,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff +# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x50,0x01,0xff] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x5f,0x01,0x01] -# GFX12: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13 +# GFX12: v_sub_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x08,0x01,0x60,0x01,0x13] -# GFX12: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30 +# GFX12: v_sub_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x09,0xff,0x6f,0xfd,0x30] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1b,0x00,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0xe4,0x00,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x40,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x41,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x01,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x0f,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x11,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x1f,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x21,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x2f,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x50,0x01,0xff] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x5f,0x01,0x01] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4c,0x01,0x60,0x01,0x13] -# GFX12: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4d,0xff,0x6f,0x0d,0x30] +0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1b,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff] -0xfa,0x04,0x0a,0x44,0x01,0xe4,0x00,0xff +0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x40,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x41,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x01,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x0f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x11,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x1f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x21,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x2f,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff] -0xfa,0x04,0x0a,0x44,0x01,0x50,0x01,0xff +0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01] -0xfa,0x04,0x0a,0x44,0x01,0x5f,0x01,0x01 +0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13] -0xfa,0x04,0x0a,0x44,0x01,0x60,0x01,0x13 +0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30] -0xfa,0xfe,0xff,0x45,0xff,0x6f,0x0d,0x30 -# GFX12: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1b,0x00,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0xe4,0x00,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x40,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x41,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x01,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x0f,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x11,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x1f,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x21,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x2f,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x50,0x01,0xff] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01 +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x5f,0x01,0x01] -# GFX12: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13 +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x68,0x01,0x60,0x01,0x13] -# GFX12: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] 0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30 +# GFX12-REAL16: v_subrev_f16_dpp v127.l, -|v127.l|, -|v127.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] +# GFX12-FAKE16: v_subrev_f16_dpp v127, -|v127|, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x68,0x7f,0x6f,0xfd,0x30] -# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1b,0x00,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0xe4,0x00,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x40,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x41,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x01,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x0f,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x11,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x1f,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x21,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x2f,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x50,0x01,0xff] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01 +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x5f,0x01,0x01] -# GFX12: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13 +# GFX12: v_subrev_f32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x0a,0x01,0x60,0x01,0x13] -# GFX12: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] 0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30 +# GFX12: v_subrev_f32_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x0b,0xff,0x6f,0xfd,0x30] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1b,0x00,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0xe4,0x00,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x40,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x41,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x01,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x0f,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x11,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x1f,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x21,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x2f,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x50,0x01,0xff] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01 +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x5f,0x01,0x01] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13 +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x4e,0x01,0x60,0x01,0x13] -# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30 +# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x4f,0xff,0x6f,0x0d,0x30] -# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1b,0x00,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0xe4,0x00,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x40,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x41,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x01,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x0f,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x11,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x1f,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x21,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x2f,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x50,0x01,0xff] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01 +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x5f,0x01,0x01] -# GFX12: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13 +# GFX12: v_xnor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3c,0x01,0x60,0x01,0x13] -# GFX12: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30 +# GFX12: v_xnor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3d,0xff,0x6f,0x0d,0x30] -# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1b,0x00,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0xe4,0x00,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x40,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x41,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x01,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x0f,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x11,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x1f,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x21,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x2f,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] 0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff +# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x50,0x01,0xff] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] 0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01 +# GFX12: v_xor_b32_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x5f,0x01,0x01] -# GFX12: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] 0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13 +# GFX12: v_xor_b32_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0xfa,0x04,0x0a,0x3a,0x01,0x60,0x01,0x13] -# GFX12: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] 0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30 +# GFX12: v_xor_b32_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xfa,0xfe,0xff,0x3b,0xff,0x6f,0x0d,0x30] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt index 2e33df35af1f36..bbf494c153fd3a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2_dpp8.txt @@ -1,244 +1,261 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64 %s +; NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-REAL16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX12-FAKE16 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX12-FAKE16 %s +0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 # W32: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] # W64: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x40,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 # W32: v_add_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] # W64: v_add_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x41,0xff,0x00,0x00,0x00 -# GFX12: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_add_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_add_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x64,0x01,0x77,0x39,0x05] -# GFX12: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_add_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_add_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x64,0x7f,0x00,0x00,0x00] -# GFX12: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05 +# GFX12: v_add_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x06,0x01,0x77,0x39,0x05] -# GFX12: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00 +# GFX12: v_add_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x07,0xff,0x00,0x00,0x00] -# GFX12: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05 +# GFX12: v_add_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4a,0x01,0x77,0x39,0x05] -# GFX12: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00 +# GFX12: v_add_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4b,0xff,0x00,0x00,0x00] -# GFX12: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05 +# GFX12: v_and_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x36,0x01,0x77,0x39,0x05] -# GFX12: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00 +# GFX12: v_and_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x37,0xff,0x00,0x00,0x00] -# GFX12: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05 +# GFX12: v_ashrrev_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x34,0x01,0x77,0x39,0x05] -# GFX12: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00 +# GFX12: v_ashrrev_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x35,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 # W32: v_cndmask_b32_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] # W64: v_cndmask_b32_dpp v5, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x02,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 # W32: v_cndmask_b32_dpp v255, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] # W64: v_cndmask_b32_dpp v255, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x03,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x5e,0x01,0x77,0x39,0x05] -# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00 +# GFX12: v_cvt_pk_rtz_f16_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x5f,0xff,0x00,0x00,0x00] -# GFX12: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05 +# GFX12: v_fmac_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6c,0x01,0x77,0x39,0x05] -# GFX12: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00 +# GFX12: v_fmac_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6c,0x7f,0x00,0x00,0x00] -# GFX12: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05 +# GFX12: v_fmac_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x56,0x01,0x77,0x39,0x05] -# GFX12: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00 +# GFX12: v_fmac_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x57,0xff,0x00,0x00,0x00] -# GFX12: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_ldexp_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_ldexp_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x76,0x01,0x77,0x39,0x05] -# GFX12: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_ldexp_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_ldexp_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x76,0x7f,0x00,0x00,0x00] -# GFX12: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05 +# GFX12: v_lshlrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x30,0x01,0x77,0x39,0x05] -# GFX12: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00 +# GFX12: v_lshlrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x31,0xff,0x00,0x00,0x00] -# GFX12: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05 +# GFX12: v_lshrrev_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x32,0x01,0x77,0x39,0x05] -# GFX12: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00 +# GFX12: v_lshrrev_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x33,0xff,0x00,0x00,0x00] -# GFX12: v_max_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_max_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_max_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x62,0x01,0x77,0x39,0x05] -# GFX12: v_max_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_max_num_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_max_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x62,0x7f,0x00,0x00,0x00] -# GFX12: v_max_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05 +# GFX12: v_max_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2c,0x01,0x77,0x39,0x05] -# GFX12: v_max_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00 +# GFX12: v_max_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2d,0xff,0x00,0x00,0x00] -# GFX12: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05 +# GFX12: v_max_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x24,0x01,0x77,0x39,0x05] -# GFX12: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00 +# GFX12: v_max_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x25,0xff,0x00,0x00,0x00] -# GFX12: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05 +# GFX12: v_max_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x28,0x01,0x77,0x39,0x05] -# GFX12: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00 +# GFX12: v_max_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x29,0xff,0x00,0x00,0x00] -# GFX12: v_min_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_min_num_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_min_num_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x60,0x01,0x77,0x39,0x05] -# GFX12: v_min_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_min_num_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_min_num_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x60,0x7f,0x00,0x00,0x00] -# GFX12: v_min_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05 +# GFX12: v_min_num_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x2a,0x01,0x77,0x39,0x05] -# GFX12: v_min_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00 +# GFX12: v_min_num_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x2b,0xff,0x00,0x00,0x00] -# GFX12: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05 +# GFX12: v_min_i32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x22,0x01,0x77,0x39,0x05] -# GFX12: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00 +# GFX12: v_min_i32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x23,0xff,0x00,0x00,0x00] -# GFX12: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05 +# GFX12: v_min_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x26,0x01,0x77,0x39,0x05] -# GFX12: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00 +# GFX12: v_min_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x27,0xff,0x00,0x00,0x00] -# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05 +# GFX12: v_mul_dx9_zero_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0e,0x01,0x77,0x39,0x05] -# GFX12: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00 +# GFX12: v_mul_dx9_zero_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0f,0xff,0x00,0x00,0x00] -# GFX12: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_mul_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_mul_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x6a,0x01,0x77,0x39,0x05] -# GFX12: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_mul_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_mul_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x6a,0x7f,0x00,0x00,0x00] -# GFX12: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05 +# GFX12: v_mul_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x10,0x01,0x77,0x39,0x05] -# GFX12: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00 +# GFX12: v_mul_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x11,0xff,0x00,0x00,0x00] -# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x14,0x01,0x77,0x39,0x05] -# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00 +# GFX12: v_mul_hi_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x15,0xff,0x00,0x00,0x00] -# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x18,0x01,0x77,0x39,0x05] -# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00 +# GFX12: v_mul_hi_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x19,0xff,0x00,0x00,0x00] -# GFX12: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05 +# GFX12: v_mul_i32_i24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x12,0x01,0x77,0x39,0x05] -# GFX12: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00 +# GFX12: v_mul_i32_i24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x13,0xff,0x00,0x00,0x00] -# GFX12: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05 +# GFX12: v_mul_u32_u24_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x16,0x01,0x77,0x39,0x05] -# GFX12: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00 +# GFX12: v_mul_u32_u24_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x17,0xff,0x00,0x00,0x00] -# GFX12: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05 +# GFX12: v_or_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x38,0x01,0x77,0x39,0x05] -# GFX12: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00 +# GFX12: v_or_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x39,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 # W32: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] # W64: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x42,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 # W32: v_sub_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] # W64: v_sub_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x43,0xff,0x00,0x00,0x00 -# GFX12: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_sub_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_sub_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x66,0x01,0x77,0x39,0x05] -# GFX12: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_sub_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_sub_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x66,0x7f,0x00,0x00,0x00] -# GFX12: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05 +# GFX12: v_sub_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x08,0x01,0x77,0x39,0x05] -# GFX12: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00 +# GFX12: v_sub_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x09,0xff,0x00,0x00,0x00] -# GFX12: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05 +# GFX12: v_sub_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4c,0x01,0x77,0x39,0x05] -# GFX12: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00 +# GFX12: v_sub_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4d,0xff,0x00,0x00,0x00] +0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 # W32: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] # W64: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05] -0xe9,0x04,0x0a,0x44,0x01,0x77,0x39,0x05 +0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 # W32: v_subrev_co_ci_u32_dpp v255, vcc_lo, v255, v255, vcc_lo dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] # W64: v_subrev_co_ci_u32_dpp v255, vcc, v255, v255, vcc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00] -0xea,0xfe,0xff,0x45,0xff,0x00,0x00,0x00 -# GFX12: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05 +# GFX12-REAL16: v_subrev_f16_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] +# GFX12-FAKE16: v_subrev_f16_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x68,0x01,0x77,0x39,0x05] -# GFX12: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] 0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00 +# GFX12-REAL16: v_subrev_f16_dpp v127.l, v127.l, v127.l dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] +# GFX12-FAKE16: v_subrev_f16_dpp v127, v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xfe,0x68,0x7f,0x00,0x00,0x00] -# GFX12: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_f32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x0a,0x01,0x77,0x39,0x05] -# GFX12: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00 +# GFX12: v_subrev_f32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x0b,0xff,0x00,0x00,0x00] -# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_nc_u32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x4e,0x01,0x77,0x39,0x05] -# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00 +# GFX12: v_subrev_nc_u32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x4f,0xff,0x00,0x00,0x00] -# GFX12: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05 +# GFX12: v_xnor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3c,0x01,0x77,0x39,0x05] -# GFX12: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00 +# GFX12: v_xnor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3d,0xff,0x00,0x00,0x00] -# GFX12: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] 0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05 +# GFX12: v_xor_b32_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0x04,0x0a,0x3a,0x01,0x77,0x39,0x05] -# GFX12: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] 0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00 +# GFX12: v_xor_b32_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xea,0xfe,0xff,0x3b,0xff,0x00,0x00,0x00] diff --git a/llvm/test/TableGen/64-bit-int.td b/llvm/test/TableGen/64-bit-int.td index 2d2bdb8b560e2c..d2a2999c14e991 100644 --- a/llvm/test/TableGen/64-bit-int.td +++ b/llvm/test/TableGen/64-bit-int.td @@ -16,7 +16,7 @@ def { #ifdef OOR3 bits<64> Val = 0x10000000000000000; #endif -// CHECK-OOR: error: Number out of range +// CHECK-OOR: error: number out of range bits<64> BinVal0 = 0x8000000000000000; bits<64> HexVal0 = 0b1000000000000000000000000000000000000000000000000000000000000000; diff --git a/llvm/test/TableGen/invalid-macro-name-command-line.td b/llvm/test/TableGen/invalid-macro-name-command-line.td index 0d2307997ebe54..7d19e8996639af 100644 --- a/llvm/test/TableGen/invalid-macro-name-command-line.td +++ b/llvm/test/TableGen/invalid-macro-name-command-line.td @@ -3,7 +3,7 @@ // RUN: not llvm-tblgen %s -D_MAC# 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-3 // RUN: not llvm-tblgen %s -D 2>&1 | FileCheck %s --check-prefix=CHECK-TEST-4 -// CHECK-TEST-1: error: Invalid macro name `MACRO=1` specified on command line -// CHECK-TEST-2: error: Invalid macro name `0MAC` specified on command line -// CHECK-TEST-3: error: Invalid macro name `_MAC#` specified on command line +// CHECK-TEST-1: error: invalid macro name `MACRO=1` specified on command line +// CHECK-TEST-2: error: invalid macro name `0MAC` specified on command line +// CHECK-TEST-3: error: invalid macro name `_MAC#` specified on command line // CHECK-TEST-4: for the -D option: requires a value! diff --git a/llvm/test/TableGen/prep-diag1.td b/llvm/test/TableGen/prep-diag1.td index 41b7d477c6942e..27f428f4fe9598 100644 --- a/llvm/test/TableGen/prep-diag1.td +++ b/llvm/test/TableGen/prep-diag1.td @@ -4,22 +4,22 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG3 %s #ifdef DIAG1 -// DIAG1: error: Only comments are supported after #define NAME +// DIAG1: error: only comments are supported after #define NAME #define ENABLED1/* */class C; #endif // DIAG1 #ifdef DIAG4 -// DIAG4: warning: Duplicate definition of macro: ENABLED1 +// DIAG4: warning: duplicate definition of macro: ENABLED1 #define ENABLED1 #define ENABLED1 #endif // DIAG4 #ifdef DIAG2 -// DIAG2: error: Only comments are supported after #ifdef NAME +// DIAG2: error: only comments are supported after #ifdef NAME // Invalid #ifdef below should be detected even if DIAG2 is not defined. -// DIAG3: error: Only comments are supported after #ifdef NAME +// DIAG3: error: only comments are supported after #ifdef NAME #ifdef DIAG2/* */class C; #endif diff --git a/llvm/test/TableGen/prep-diag10.td b/llvm/test/TableGen/prep-diag10.td index eb387a07b066ca..cfcbab094ad73b 100644 --- a/llvm/test/TableGen/prep-diag10.td +++ b/llvm/test/TableGen/prep-diag10.td @@ -1,6 +1,6 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED #else diff --git a/llvm/test/TableGen/prep-diag11.td b/llvm/test/TableGen/prep-diag11.td index 0042bc04f9e101..1fe8a8503076e5 100644 --- a/llvm/test/TableGen/prep-diag11.td +++ b/llvm/test/TableGen/prep-diag11.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED #else #define ENABLED diff --git a/llvm/test/TableGen/prep-diag12.td b/llvm/test/TableGen/prep-diag12.td index c26301ee17ac2b..02ffa672b2fa05 100644 --- a/llvm/test/TableGen/prep-diag12.td +++ b/llvm/test/TableGen/prep-diag12.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED #else #define ENABLED diff --git a/llvm/test/TableGen/prep-diag13.td b/llvm/test/TableGen/prep-diag13.td index aa3fdab4802d37..733a46a1618131 100644 --- a/llvm/test/TableGen/prep-diag13.td +++ b/llvm/test/TableGen/prep-diag13.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED /* #else diff --git a/llvm/test/TableGen/prep-diag14.td b/llvm/test/TableGen/prep-diag14.td index cae9bc3b7f5b6c..a3216ee4f47125 100644 --- a/llvm/test/TableGen/prep-diag14.td +++ b/llvm/test/TableGen/prep-diag14.td @@ -1,6 +1,6 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED // #endif diff --git a/llvm/test/TableGen/prep-diag2.td b/llvm/test/TableGen/prep-diag2.td index 741026b9c8a2d6..e51490600ff64f 100644 --- a/llvm/test/TableGen/prep-diag2.td +++ b/llvm/test/TableGen/prep-diag2.td @@ -2,10 +2,10 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s #ifdef DIAG1 -// DIAG1: error: Only comments are supported after #else +// DIAG1: error: only comments are supported after #else // Invalid #else below should be detected even if DIAG1 is not defined. -// DIAG2: error: Only comments are supported after #else +// DIAG2: error: only comments are supported after #else #ifdef DIAG2//DIAG2 #else/* */class C; diff --git a/llvm/test/TableGen/prep-diag3.td b/llvm/test/TableGen/prep-diag3.td index fbedfa290b9947..0b4d40307b40b0 100644 --- a/llvm/test/TableGen/prep-diag3.td +++ b/llvm/test/TableGen/prep-diag3.td @@ -2,10 +2,10 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck --check-prefixes=DIAG2 %s #ifdef DIAG1 -// DIAG1: error: Only comments are supported after #endif +// DIAG1: error: only comments are supported after #endif // Invalid #else below should be detected even if DIAG1 is not defined. -// DIAG2: error: Only comments are supported after #endif +// DIAG2: error: only comments are supported after #endif #ifdef DIAG2//DIAG2 #else/*!DIAG2*/ #endif/* !DIAG2 diff --git a/llvm/test/TableGen/prep-diag4.td b/llvm/test/TableGen/prep-diag4.td index 4661ef8667d23f..ead116ebde0de8 100644 --- a/llvm/test/TableGen/prep-diag4.td +++ b/llvm/test/TableGen/prep-diag4.td @@ -1,7 +1,7 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s // CHECK: error: double #else -// CHECK: error: Previous #else is here +// CHECK: error: previous #else is here #ifdef DIAG1 #else #else diff --git a/llvm/test/TableGen/prep-diag6.td b/llvm/test/TableGen/prep-diag6.td index f4202d115da59a..bf1cd3d3490b5e 100644 --- a/llvm/test/TableGen/prep-diag6.td +++ b/llvm/test/TableGen/prep-diag6.td @@ -1,6 +1,6 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Expected macro name after #ifdef +// CHECK: error: expected macro name after #ifdef #ifdef #else #else diff --git a/llvm/test/TableGen/prep-diag8.td b/llvm/test/TableGen/prep-diag8.td index 7a7bde62c79c4e..82797d6cf4a62d 100644 --- a/llvm/test/TableGen/prep-diag8.td +++ b/llvm/test/TableGen/prep-diag8.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Expected macro name after #define +// CHECK: error: expected macro name after #define #define #endif diff --git a/llvm/test/TableGen/prep-diag9.td b/llvm/test/TableGen/prep-diag9.td index 4ecff575cdc7bb..6ad208104301bc 100644 --- a/llvm/test/TableGen/prep-diag9.td +++ b/llvm/test/TableGen/prep-diag9.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Reached EOF without matching #endif -// CHECK: error: The latest preprocessor control is here +// CHECK: error: reached EOF without matching #endif +// CHECK: error: the latest preprocessor control is here #ifdef DISABLED diff --git a/llvm/test/TableGen/prep-ifndef-diag-1.td b/llvm/test/TableGen/prep-ifndef-diag-1.td index 941f2d377a98a7..4a0d0754ed7906 100644 --- a/llvm/test/TableGen/prep-ifndef-diag-1.td +++ b/llvm/test/TableGen/prep-ifndef-diag-1.td @@ -1,4 +1,4 @@ // RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -// CHECK: error: Expected macro name after #ifndef +// CHECK: error: expected macro name after #ifndef #ifndef 1 diff --git a/llvm/test/TableGen/prep-ifndef-diag-2.td b/llvm/test/TableGen/prep-ifndef-diag-2.td index 7b5f9dfd24b786..c89cbab08e5c5c 100644 --- a/llvm/test/TableGen/prep-ifndef-diag-2.td +++ b/llvm/test/TableGen/prep-ifndef-diag-2.td @@ -1,4 +1,4 @@ // RUN: not llvm-tblgen %s 2>&1 | FileCheck %s -// CHECK: error: Only comments are supported after #ifndef NAME +// CHECK: error: only comments are supported after #ifndef NAME #ifndef MACRO 42 diff --git a/llvm/test/TableGen/unterminated-c-comment.td b/llvm/test/TableGen/unterminated-c-comment.td index 0f4cd9d633c66d..b5b995342be744 100644 --- a/llvm/test/TableGen/unterminated-c-comment.td +++ b/llvm/test/TableGen/unterminated-c-comment.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Unterminated comment! +// CHECK: error: unterminated comment include "unterminated-c-comment-include.inc" */ diff --git a/llvm/test/TableGen/unterminated-code-block.td b/llvm/test/TableGen/unterminated-code-block.td index d6b6f50827a672..5bd4cd7e17d827 100644 --- a/llvm/test/TableGen/unterminated-code-block.td +++ b/llvm/test/TableGen/unterminated-code-block.td @@ -1,5 +1,5 @@ // RUN: not llvm-tblgen -I %p %s 2>&1 | FileCheck %s -// CHECK: error: Unterminated code block +// CHECK: error: unterminated code block include "unterminated-code-block-include.inc" }]>; diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 43c206fa0af698..21f3c8593a710b 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -1959,8 +1959,11 @@ static const X86FoldTableEntry Table1[] = { {X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE}, {X86::VUCOMISSrr, X86::VUCOMISSrm, 0}, {X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSDZrr, X86::VUCOMXSDZrm, 0}, {X86::VUCOMXSDZrr_Int, X86::VUCOMXSDZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSHZrr, X86::VUCOMXSHZrm, 0}, {X86::VUCOMXSHZrr_Int, X86::VUCOMXSHZrm_Int, TB_NO_REVERSE}, + {X86::VUCOMXSSZrr, X86::VUCOMXSSZrm, 0}, {X86::VUCOMXSSZrr_Int, X86::VUCOMXSSZrm_Int, TB_NO_REVERSE}, {X86::XOR16ri8_ND, X86::XOR16mi8_ND, 0}, {X86::XOR16ri8_NF_ND, X86::XOR16mi8_NF_ND, 0}, diff --git a/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll new file mode 100644 index 00000000000000..6635280bc43603 --- /dev/null +++ b/llvm/test/Transforms/Attributor/nofpclass-phiselect.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -S < %s | FileCheck %s + +define float @phi_select(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_select +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[SELECT]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %select, %loop ] + %select = select i1 %c, float %phi, float %arg + br i1 %c, label %loop, label %exit + +exit: + ret float %select +} + +define float @phi_select_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) { +; CHECK-LABEL: define float @phi_select_onlybase +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[SELECT]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %select, %loop ] + %select = select i1 %c, float %phi, float %arg + br i1 %c, label %loop, label %exit + +exit: + ret float %select +} + +define float @phi_select_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_select_onlyarg +; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SELECT]] = select i1 [[C]], float [[PHI]], float [[ARG]] +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[SELECT]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %select, %loop ] + %select = select i1 %c, float %phi, float %arg + br i1 %c, label %loop, label %exit + +exit: + ret float %select +} + +define float @phi_phi(i1 %c, float nofpclass(inf) %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_phi +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[PHI2]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ] + br label %inner + +inner: + %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ] + br i1 %c, label %inner, label %exit1 + +exit1: + br i1 %c, label %loop, label %exit + +exit: + ret float %phi2 +} + +define float @phi_phi_onlybase(i1 %c, float nofpclass(inf) %base, float %arg) { +; CHECK-LABEL: define float @phi_phi_onlybase +; CHECK-SAME: (i1 [[C:%.*]], float nofpclass(inf) [[BASE:%.*]], float [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[PHI2]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ] + br label %inner + +inner: + %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ] + br i1 %c, label %inner, label %exit1 + +exit1: + br i1 %c, label %loop, label %exit + +exit: + ret float %phi2 +} + +define float @phi_phi_onlyarg(i1 %c, float %base, float nofpclass(inf) %arg) { +; CHECK-LABEL: define float @phi_phi_onlyarg +; CHECK-SAME: (i1 [[C:%.*]], float [[BASE:%.*]], float nofpclass(inf) [[ARG:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[PHI:%.*]] = phi float [ [[BASE]], [[ENTRY:%.*]] ], [ [[PHI2:%.*]], [[EXIT1:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[PHI2]] = phi float [ [[PHI]], [[LOOP]] ], [ [[ARG]], [[INNER]] ] +; CHECK-NEXT: br i1 [[C]], label [[INNER]], label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret float [[PHI2]] +; +entry: + br label %loop + +loop: + %phi = phi float [ %base, %entry ], [ %phi2, %exit1 ] + br label %inner + +inner: + %phi2 = phi float [ %phi, %loop ], [ %arg, %inner ] + br i1 %c, label %inner, label %exit1 + +exit1: + br i1 %c, label %loop, label %exit + +exit: + ret float %phi2 +} diff --git a/llvm/test/Transforms/Inline/convergence-inline.ll b/llvm/test/Transforms/Inline/convergence-inline.ll index 8c67e6a59b7db1..4996a2376be638 100644 --- a/llvm/test/Transforms/Inline/convergence-inline.ll +++ b/llvm/test/Transforms/Inline/convergence-inline.ll @@ -185,6 +185,30 @@ define void @test_two_calls() convergent { ret void } +define i32 @token_not_first(i32 %x) convergent alwaysinline { +; CHECK-LABEL: @token_not_first( +; CHECK-NEXT: {{%.*}} = alloca ptr, align 8 +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: [[Y:%.*]] = call i32 @g(i32 [[X:%.*]]) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret i32 [[Y]] +; + %p = alloca ptr, align 8 + %token = call token @llvm.experimental.convergence.entry() + %y = call i32 @g(i32 %x) [ "convergencectrl"(token %token) ] + ret i32 %y +} + +define void @test_token_not_first() convergent { +; CHECK-LABEL: @test_token_not_first( +; CHECK-NEXT: [[TOKEN:%.*]] = call token @llvm.experimental.convergence.entry() +; CHECK-NEXT: {{%.*}} = call i32 @g(i32 23) [ "convergencectrl"(token [[TOKEN]]) ] +; CHECK-NEXT: ret void +; + %token = call token @llvm.experimental.convergence.entry() + %x = call i32 @token_not_first(i32 23) [ "convergencectrl"(token %token) ] + ret void +} + declare void @f(i32) convergent declare i32 @g(i32) convergent diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arith-fp-frem-costs.ll similarity index 100% rename from llvm/test/Analysis/CostModel/AArch64/arith-fp-frem.ll rename to llvm/test/Transforms/LoopVectorize/AArch64/arith-fp-frem-costs.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index 0e55ad65cdb2c2..6724afd6ca10f4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -410,45 +410,49 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) -; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 3 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP4]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 16, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]] -; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], [[TMP6]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 2 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP10:%.*]] = add [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = mul [[TMP10]], shufflevector ( insertelement ( poison, i64 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP8]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP12]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[STRIDED_VEC4]] to <8 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[STRIDED_VEC5]] to <8 x i32> -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[VEC_IND]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[STEP_ADD]] -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP11]], <8 x ptr> [[TMP13]], i32 4, <8 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP12]], <8 x ptr> [[TMP14]], i32 4, <8 x i1> ) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP13]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP14]], align 1 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i8( [[WIDE_VEC]]) +; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 +; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { , } [[STRIDED_VEC]], 1 +; CHECK-NEXT: [[TMP17:%.*]] = zext [[TMP16]] to +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[DST]], [[VEC_IND]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP17]], [[TMP18]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] @@ -462,9 +466,9 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s ; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_1]] to i32 ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i32 [[EXT]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 4 +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 2 ; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -481,7 +485,7 @@ loop: %ext = zext i8 %l.1 to i32 %gep.dst = getelementptr i32, ptr %dst, i64 %iv store i32 %ext, ptr %gep.dst, align 4 - %iv.next = add nsw i64 %iv, 4 + %iv.next = add nsw i64 %iv, 2 %ec = icmp slt i64 %iv, %N br i1 %ec, label %loop, label %exit diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll index fa346b4eac02d4..6477f14e3c6984 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll @@ -6,26 +6,26 @@ define void @i8_factor_2(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_2' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 3 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> -; CHECK: Cost of 5 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at , ir<%p0> +; CHECK: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0> +; CHECK: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0 @@ -49,16 +49,16 @@ define void @i8_factor_3(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_3' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at , ir<%p0> +; CHECK: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0> +; CHECK: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0 @@ -86,16 +86,16 @@ define void @i8_factor_4(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_4' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at , ir<%p0> +; CHECK: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0> +; CHECK: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0 @@ -127,14 +127,14 @@ define void @i8_factor_5(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_5' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at , ir<%p0> +; CHECK: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0> +; CHECK: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 0 @@ -170,14 +170,14 @@ define void @i8_factor_6(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_6' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at , ir<%p0> +; CHECK: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0> +; CHECK: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 0 @@ -217,14 +217,14 @@ define void @i8_factor_7(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_7' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at , ir<%p0> +; CHECK: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0> +; CHECK: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 0 @@ -268,14 +268,14 @@ define void @i8_factor_8(ptr %data, i64 %n) { entry: br label %for.body ; CHECK-LABEL: Checking a loop in 'i8_factor_8' -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 2 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> -; CHECK: Cost of 9 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at , ir<%p0> +; CHECK: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0> +; CHECK: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at , ir<%p0> for.body: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ] %p0 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 0 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i16-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i16-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i32-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i32-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i64-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i64-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i8-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/gather-i8-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/handle-iptr-with-data-layout-to-not-assert.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/handle-iptr-with-data-layout-to-not-assert.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/handle-iptr-with-data-layout-to-not-assert.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f32-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-f64-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-half.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-half.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-half.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i16-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2-indices-0u.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-01u.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3-indices-0uu.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-012u.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-01uu.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4-indices-0uuu.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i32-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i64-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-load-i8-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f32-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-f64-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i16-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i32-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i64-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-2.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-3.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-4.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-5.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-6.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-7.ll diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/interleaved-store-i8-stride-8.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i32-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i32-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i64-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-gather-i64-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-load-i16.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-interleaved-store-i16.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i16.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-load-i16.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i16.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i32.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-load-i32.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i32.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i64.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-load-i64.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i64.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-load-i8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-load-i8.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i32-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-scatter-i64-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-store-i16.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i16.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-store-i32.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i32.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-store-i64.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i64.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/masked-store-i8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/masked-store-i8.ll diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i16-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i32-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i64-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/scatter-i8-with-i8-index.ll diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i16.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/strided-load-i16.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i16.ll diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i32.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/strided-load-i32.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i32.ll diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i64.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/strided-load-i64.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i64.ll diff --git a/llvm/test/Analysis/CostModel/X86/strided-load-i8.ll b/llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll similarity index 100% rename from llvm/test/Analysis/CostModel/X86/strided-load-i8.ll rename to llvm/test/Transforms/LoopVectorize/X86/CostModel/strided-load-i8.ll diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll index e1457ca7251ed8..205eeb8878989d 100644 --- a/llvm/test/Transforms/PGOProfile/memprof.ll +++ b/llvm/test/Transforms/PGOProfile/memprof.ll @@ -66,6 +66,18 @@ ;; Check that the total sizes are reported if requested. ; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -memprof-report-hinted-sizes 2>&1 | FileCheck %s --check-prefixes=TOTALSIZES +;; Make sure we emit a random hotness seed if requested. +; RUN: llvm-profdata merge -memprof-random-hotness %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand 2>&1 | FileCheck %s --check-prefix=RAND +; RAND: random hotness seed = +;; Can't check the exact values, but make sure applying the random profile +;; succeeds with the same stats +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=ALL,MEMPROFONLY,MEMPROFSTATS + +;; Make sure we use a specific random hotness seed if requested. +; RUN: llvm-profdata merge -memprof-random-hotness -memprof-random-hotness-seed=1730170724 %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdatarand2 2>&1 | FileCheck %s --check-prefix=RAND2 +; RAND2: random hotness seed = 1730170724 +; RUN: opt < %s -passes='memprof-use' -pgo-warn-missing-function -S -stats 2>&1 | FileCheck %s --check-prefixes=MEMPROFRAND2,ALL,MEMPROFONLY,MEMPROFSTATS + ; MEMPROFMATCHINFO: MemProf notcold context with id 1093248920606587996 has total profiled size 10 is matched ; MEMPROFMATCHINFO: MemProf notcold context with id 5725971306423925017 has total profiled size 10 is matched ; MEMPROFMATCHINFO: MemProf notcold context with id 6792096022461663180 has total profiled size 10 is matched @@ -372,6 +384,13 @@ for.end: ; preds = %for.cond ; MEMPROFNOCOLINFO: ![[C10]] = !{i64 -4535090212904553409} ; MEMPROFNOCOLINFO: ![[C11]] = !{i64 3577763375057267810} +;; For the specific random seed, this is the expected order of hotness +; MEMPROFRAND2: !"cold" +; MEMPROFRAND2: !"cold" +; MEMPROFRAND2: !"cold" +; MEMPROFRAND2: !"hot" +; MEMPROFRAND2: !"hot" + ; MEMPROFSTATS: 8 memprof - Number of alloc contexts in memory profile. ; MEMPROFSTATS: 10 memprof - Number of callsites in memory profile. ; MEMPROFSTATS: 6 memprof - Number of functions having valid memory profile. diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll new file mode 100644 index 00000000000000..4a024cc4c0309c --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 + +define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) { +; SSE-LABEL: @PR50392( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2 +; SSE-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3 +; SSE-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]] +; SSE-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3 +; SSE-NEXT: ret <4 x double> [[SHUFFLE]] +; +; AVX1-LABEL: @PR50392( +; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX1-NEXT: [[VECEXT10:%.*]] = extractelement <4 x double> [[B]], i64 2 +; AVX1-NEXT: [[VECEXT11:%.*]] = extractelement <4 x double> [[B]], i64 3 +; AVX1-NEXT: [[ADD12:%.*]] = fadd double [[VECEXT10]], [[VECEXT11]] +; AVX1-NEXT: [[SHUFFLE:%.*]] = insertelement <4 x double> [[TMP4]], double [[ADD12]], i64 3 +; AVX1-NEXT: ret <4 x double> [[SHUFFLE]] +; +; AVX2-LABEL: @PR50392( +; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; AVX2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> +; AVX2-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]] +; AVX2-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> +; AVX2-NEXT: ret <4 x double> [[SHUFFLE]] +; + %vecext = extractelement <4 x double> %a, i32 0 + %vecext1 = extractelement <4 x double> %a, i32 1 + %add = fadd double %vecext, %vecext1 + %vecinit = insertelement <4 x double> poison, double %add, i32 0 + %vecext2 = extractelement <4 x double> %a, i32 2 + %vecext3 = extractelement <4 x double> %a, i32 3 + %add4 = fadd double %vecext2, %vecext3 + %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1 + %vecext6 = extractelement <4 x double> %b, i32 0 + %vecext7 = extractelement <4 x double> %b, i32 1 + %add8 = fadd double %vecext6, %vecext7 + %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2 + %vecext10 = extractelement <4 x double> %b, i32 2 + %vecext11 = extractelement <4 x double> %b, i32 3 + %add12 = fadd double %vecext10, %vecext11 + %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3 + %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> + ret <4 x double> %shuffle +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll new file mode 100644 index 00000000000000..1d4cee45b66856 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -O3 -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -O3 -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v2 -passes="default" -S < %s | FileCheck %s --check-prefixes=SSE +; RUN: opt -mtriple=x86_64-- -mcpu=btver2 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2 + +define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) { +; SSE-LABEL: @PR94546( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; SSE-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; SSE-NEXT: ret <4 x double> [[TMP4]] +; +; AVX-LABEL: @PR94546( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> +; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]] +; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> +; AVX-NEXT: ret <4 x double> [[TMP4]] +; + %vecext = extractelement <4 x double> %a, i32 0 + %vecext1 = extractelement <4 x double> %a, i32 1 + %add = fadd double %vecext, %vecext1 + %vecinit = insertelement <4 x double> poison, double %add, i32 0 + %vecext2 = extractelement <4 x double> %a, i32 2 + %vecext3 = extractelement <4 x double> %a, i32 3 + %add4 = fadd double %vecext2, %vecext3 + %vecinit5 = insertelement <4 x double> %vecinit, double %add4, i32 1 + %vecext6 = extractelement <4 x double> %b, i32 0 + %vecext7 = extractelement <4 x double> %b, i32 1 + %add8 = fadd double %vecext6, %vecext7 + %vecinit9 = insertelement <4 x double> %vecinit5, double %add8, i32 2 + %vecext10 = extractelement <4 x double> %b, i32 2 + %vecext11 = extractelement <4 x double> %b, i32 3 + %add12 = fadd double %vecext10, %vecext11 + %vecinit13 = insertelement <4 x double> %vecinit9, double %add12, i32 3 + %shuffle = shufflevector <4 x double> %vecinit13, <4 x double> %a, <4 x i32> + ret <4 x double> %shuffle +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX1: {{.*}} +; AVX2: {{.*}} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll index bcea147d724f53..f23043f0c47f4a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll @@ -453,14 +453,9 @@ define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) { ; ; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16( ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { -; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8 -; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8 -; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4 -; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half> -; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4 -; CHECK-F16C-NEXT: [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half> -; CHECK-F16C-NEXT: store <8 x half> [[TMP2]], ptr [[D0]], align 2 -; CHECK-F16C-NEXT: store <8 x half> [[TMP4]], ptr [[D8]], align 2 +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half> +; CHECK-F16C-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2 ; CHECK-F16C-NEXT: ret void ; ; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16( diff --git a/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll new file mode 100644 index 00000000000000..e94868c7b9e5b3 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX + +; Fold "shuffle (binop (shuffle, shuffle)), undef" --> "binop (shuffle), (shuffle)" + +declare void @use_v4f64(<4 x double>) + +define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: define <4 x double> @fadd_v4f64( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[POST1]] +; + %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> + %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> + %op = fadd <4 x double> %a1, %b1 + %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> + ret <4 x double> %post +} + +define <4 x double> @fadd_v4f64_poison_idx(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: define <4 x double> @fadd_v4f64_poison_idx( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[POST1]] +; + %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> + %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> + %op = fadd <4 x double> %a1, %b1 + %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> + ret <4 x double> %post +} + +define <4 x double> @fadd_mixed_types(<4 x double> %a, <2 x double> %b) { +; CHECK-LABEL: define <4 x double> @fadd_mixed_types( +; CHECK-SAME: <4 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> +; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: ret <4 x double> [[POST1]] +; + %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> + %b1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> + %op = fadd <4 x double> %a1, %b1 + %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> + ret <4 x double> %post +} + +; Negative test - multiple use of fadd +define <4 x double> @fadd_v4f64_multiuse_op(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: define <4 x double> @fadd_v4f64_multiuse_op( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[OP:%.*]] = fadd <4 x double> [[A1]], [[B1]] +; CHECK-NEXT: [[POST:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: call void @use_v4f64(<4 x double> [[OP]]) +; CHECK-NEXT: ret <4 x double> [[POST]] +; + %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> + %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> + %op = fadd <4 x double> %a1, %b1 + %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> + call void @use_v4f64(<4 x double> %op) + ret <4 x double> %post +} + +; Negative test - multiple use of inner shuffle +define <4 x double> @fadd_v4f64_multiuse_shuffle(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle( +; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: [[OP:%.*]] = fadd <4 x double> [[A1]], [[B1]] +; CHECK-NEXT: [[POST:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <4 x i32> +; CHECK-NEXT: call void @use_v4f64(<4 x double> [[A1]]) +; CHECK-NEXT: ret <4 x double> [[POST]] +; + %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> + %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> + %op = fadd <4 x double> %a1, %b1 + %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> + call void @use_v4f64(<4 x double> %a1) + ret <4 x double> %post +} + +define <4 x i32> @sdiv_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: define <4 x i32> @sdiv_v4i32( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[POST:%.*]] = sdiv <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x i32> [[POST]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[POST1]] +; + %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> + %op = sdiv <4 x i32> %a1, %b1 + %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> + ret <4 x i32> %post +} + +; Negative test - don't introduce poison element into div/rem instruction +define <4 x i32> @sdiv_v4i32_poison(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: define <4 x i32> @sdiv_v4i32_poison( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[OP:%.*]] = sdiv <4 x i32> [[A1]], [[B1]] +; CHECK-NEXT: [[POST:%.*]] = shufflevector <4 x i32> [[OP]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[POST]] +; + %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> + %op = sdiv <4 x i32> %a1, %b1 + %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> + ret <4 x i32> %post +} + +; Negative test - don't introduce poison element into div/rem instruction +define <4 x i32> @sdiv_v4i32_poison_idx(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: define <4 x i32> @sdiv_v4i32_poison_idx( +; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[A1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[B1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[OP:%.*]] = sdiv <4 x i32> [[A1]], [[B1]] +; CHECK-NEXT: [[POST:%.*]] = shufflevector <4 x i32> [[OP]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[POST]] +; + %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> + %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> + %op = sdiv <4 x i32> %a1, %b1 + %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> + ret <4 x i32> %post +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE: {{.*}} diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s index 489bd1801d864a..76f8e7880d836e 100644 --- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s @@ -1,3 +1,6 @@ -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s +// RUN: not llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECKA %s +// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck --check-prefixes=CHECKB %s v_bfrev_b32 v5, v299 + +v_bfrev_b32 v5, v1 diff --git a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected index ca287fc2d63209..fffe299f3d16f6 100644 --- a/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected +++ b/llvm/test/tools/UpdateTestChecks/update_mc_test_checks/Inputs/amdgpu_asm_err.s.expected @@ -1,5 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py -// RUN: not llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECK %s +// RUN: not llvm-mc -triple=amdgcn -show-encoding %s 2>&1 | FileCheck --check-prefixes=CHECKA %s +// RUN: not llvm-mc -triple=amdgcn %s 2>&1 | FileCheck --check-prefixes=CHECKB %s v_bfrev_b32 v5, v299 -// CHECK: :[[@LINE-1]]:17: error: register index is out of range +// CHECKA: :[[@LINE-1]]:17: error: register index is out of range +// CHECKB: :[[@LINE-2]]:17: error: register index is out of range + +v_bfrev_b32 v5, v1 +// CHECKA: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] +// CHECKB: v_bfrev_b32_e32 v5, v1 diff --git a/llvm/test/tools/llvm-objdump/X86/disassemble-color.s b/llvm/test/tools/llvm-objdump/X86/disassemble-color.s new file mode 100644 index 00000000000000..4e1d82562fb546 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/disassemble-color.s @@ -0,0 +1,21 @@ +# UNSUPPORTED: system-windows +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-objdump -d --no-show-raw-insn --disassembler-color=on %t | FileCheck %s --check-prefix=ATT +# RUN: llvm-objdump -d --no-show-raw-insn --disassembler-color=on -M intel %t | FileCheck %s --check-prefix=INTEL + +# ATT: <.text>: +# ATT-NEXT: leaq (%rdx,%rax,4), %rbx +# ATT-NEXT: movq (,%rax), %rbx +# ATT-NEXT: leaq 0x3(%rdx,%rax), %rbx +# ATT-NEXT: movq $0x3, %rax + +# INTEL: <.text>: +# INTEL-NEXT: lea rbx, [rdx + 4*rax] +# INTEL-NEXT: mov rbx, qword ptr [1*rax] +# INTEL-NEXT: lea rbx, [rdx + rax + 0x3] +# INTEL-NEXT: mov rax, 0x3 + +leaq (%rdx,%rax,4), %rbx +movq (,%rax), %rbx +leaq 3(%rdx,%rax), %rbx +movq $3, %rax diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 59f0f1f1fae899..f7023aa966adf6 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -342,6 +342,15 @@ cl::opt MemProfFullSchema( "memprof-full-schema", cl::Hidden, cl::sub(MergeSubcommand), cl::desc("Use the full schema for serialization"), cl::init(false)); +static cl::opt + MemprofGenerateRandomHotness("memprof-random-hotness", cl::init(false), + cl::Hidden, cl::sub(MergeSubcommand), + cl::desc("Generate random hotness values")); +static cl::opt MemprofGenerateRandomHotnessSeed( + "memprof-random-hotness-seed", cl::init(0), cl::Hidden, + cl::sub(MergeSubcommand), + cl::desc("Random hotness seed to use (0 to generate new seed)")); + // Options specific to overlap subcommand. cl::opt BaseFilename(cl::Positional, cl::Required, cl::desc(""), @@ -641,7 +650,8 @@ struct WriterContext { SmallSet &WriterErrorCodes, uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0) : Writer(IsSparse, ReservoirSize, MaxTraceLength, DoWritePrevVersion, - MemProfVersionRequested, MemProfFullSchema), + MemProfVersionRequested, MemProfFullSchema, + MemprofGenerateRandomHotness, MemprofGenerateRandomHotnessSeed), ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {} }; diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index 3df72ec8115b6a..ffedb2c74220f0 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -6,6 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "../lib/Target/AArch64/AArch64ISelLowering.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h" @@ -167,6 +168,18 @@ TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_EXTRACT_SUBVECTOR) { EXPECT_EQ(DAG->ComputeNumSignBits(Op, DemandedElts), 7u); } +TEST_F(AArch64SelectionDAGTest, ComputeNumSignBits_VASHR) { + SDLoc Loc; + auto VecVT = MVT::v8i8; + auto Shift = DAG->getConstant(4, Loc, MVT::i32); + auto Vec0 = DAG->getConstant(1, Loc, VecVT); + auto Op1 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, Vec0, Shift); + EXPECT_EQ(DAG->ComputeNumSignBits(Op1), 8u); + auto VecA = DAG->getConstant(0xaa, Loc, VecVT); + auto Op2 = DAG->getNode(AArch64ISD::VASHR, Loc, VecVT, VecA, Shift); + EXPECT_EQ(DAG->ComputeNumSignBits(Op2), 5u); +} + TEST_F(AArch64SelectionDAGTest, SimplifyDemandedVectorElts_EXTRACT_SUBVECTOR) { TargetLowering TL(*TM); diff --git a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp index 63cf3a397cb30d..bc996711f7ec50 100644 --- a/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp +++ b/llvm/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp @@ -65,6 +65,36 @@ TEST_F(ObjectLinkingLayerTest, AddLinkGraph) { EXPECT_THAT_EXPECTED(ES.lookup(&JD, "_X"), Succeeded()); } +TEST_F(ObjectLinkingLayerTest, ResourceTracker) { + // This test transfers allocations to previously unknown ResourceTrackers, + // while increasing the number of trackers in the ObjectLinkingLayer, which + // may invalidate some iterators internally. + std::vector Trackers; + for (unsigned I = 0; I < 64; I++) { + auto G = std::make_unique("foo", Triple("x86_64-apple-darwin"), + 8, llvm::endianness::little, + x86_64::getEdgeKindName); + + auto &Sec1 = G->createSection("__data", MemProt::Read | MemProt::Write); + auto &B1 = G->createContentBlock(Sec1, BlockContent, + orc::ExecutorAddr(0x1000), 8, 0); + llvm::SmallString<0> SymbolName; + SymbolName += "_X"; + SymbolName += std::to_string(I); + G->addDefinedSymbol(B1, 4, SymbolName, 4, Linkage::Strong, Scope::Default, + false, false); + + auto RT1 = JD.createResourceTracker(); + EXPECT_THAT_ERROR(ObjLinkingLayer.add(RT1, std::move(G)), Succeeded()); + EXPECT_THAT_EXPECTED(ES.lookup(&JD, SymbolName), Succeeded()); + + auto RT2 = JD.createResourceTracker(); + RT1->transferTo(*RT2); + + Trackers.push_back(RT2); + } +} + TEST_F(ObjectLinkingLayerTest, ClaimLateDefinedWeakSymbols) { // Check that claiming weak symbols works as expected. // diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 91cd35a10e9b92..462578a34da837 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -385,5 +385,35 @@ TEST(VerifierTest, AtomicRMW) { << Error; } +TEST(VerifierTest, GetElementPtrInst) { + LLVMContext C; + Module M("M", C); + FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, "foo", M); + BasicBlock *Entry = BasicBlock::Create(C, "entry", F); + ReturnInst *RI = ReturnInst::Create(C, Entry); + + FixedVectorType *V2P1Ty = FixedVectorType::get(PointerType::get(C, 1), 2); + FixedVectorType *V2P2Ty = FixedVectorType::get(PointerType::get(C, 2), 2); + + Instruction *GEPVec = GetElementPtrInst::Create( + Type::getInt8Ty(C), ConstantAggregateZero::get(V2P1Ty), + {ConstantVector::getSplat(ElementCount::getFixed(2), + ConstantInt::get(Type::getInt64Ty(C), 0))}, + Entry); + + GEPVec->insertBefore(RI); + + // Break the address space of the source value + GEPVec->getOperandUse(0).set(ConstantAggregateZero::get(V2P2Ty)); + + std::string Error; + raw_string_ostream ErrorOS(Error); + EXPECT_TRUE(verifyFunction(*F, &ErrorOS)); + EXPECT_TRUE( + StringRef(Error).starts_with("GEP address space doesn't match type")) + << Error; +} + } // end anonymous namespace } // end namespace llvm diff --git a/llvm/unittests/Target/AMDGPU/CMakeLists.txt b/llvm/unittests/Target/AMDGPU/CMakeLists.txt index e0efb967b5941d..ca8f48bc393efd 100644 --- a/llvm/unittests/Target/AMDGPU/CMakeLists.txt +++ b/llvm/unittests/Target/AMDGPU/CMakeLists.txt @@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS CodeGen CodeGenTypes Core + GlobalISel MC Support TargetParser @@ -18,6 +19,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_target_unittest(AMDGPUTests AMDGPUUnitTests.cpp + CSETest.cpp DwarfRegMappings.cpp ExecMayBeModifiedBeforeAnyUse.cpp PALMetadata.cpp diff --git a/llvm/unittests/Target/AMDGPU/CSETest.cpp b/llvm/unittests/Target/AMDGPU/CSETest.cpp new file mode 100644 index 00000000000000..3de5b88599649a --- /dev/null +++ b/llvm/unittests/Target/AMDGPU/CSETest.cpp @@ -0,0 +1,74 @@ +//===- llvm/unittests/Target/AMDGPU/CSETest.cpp ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUTargetMachine.h" +#include "AMDGPUUnitTests.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "gtest/gtest.h" + +using namespace llvm; + +TEST(AMDGPU, TestCSEForRegisterClassOrBankAndLLT) { + auto TM = createAMDGPUTargetMachine("amdgcn-amd-", "gfx1100", ""); + if (!TM) + GTEST_SKIP(); + + GCNSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), *TM); + + LLVMContext Ctx; + Module Mod("Module", Ctx); + Mod.setDataLayout(TM->createDataLayout()); + + auto *Type = FunctionType::get(Type::getVoidTy(Ctx), false); + auto *F = Function::Create(Type, GlobalValue::ExternalLinkage, "Test", &Mod); + + MachineModuleInfo MMI(TM.get()); + auto MF = + std::make_unique(*F, *TM, ST, MMI.getContext(), 42); + auto *BB = MF->CreateMachineBasicBlock(); + MF->push_back(BB); + + MachineIRBuilder B(*MF); + B.setMBB(*BB); + + LLT S32{LLT::scalar(32)}; + Register R0 = B.buildCopy(S32, Register(AMDGPU::SGPR0)).getReg(0); + Register R1 = B.buildCopy(S32, Register(AMDGPU::SGPR1)).getReg(0); + + GISelCSEInfo CSEInfo; + CSEInfo.setCSEConfig(std::make_unique()); + CSEInfo.analyze(*MF); + B.setCSEInfo(&CSEInfo); + CSEMIRBuilder CSEB(B.getState()); + CSEB.setInsertPt(B.getMBB(), B.getInsertPt()); + + const RegisterBankInfo &RBI = *MF->getSubtarget().getRegBankInfo(); + + const TargetRegisterClass *SgprRC = &AMDGPU::SReg_32RegClass; + const RegisterBank *SgprRB = &RBI.getRegBank(AMDGPU::SGPRRegBankID); + MachineRegisterInfo::VRegAttrs SgprRCS32 = {SgprRC, S32}; + MachineRegisterInfo::VRegAttrs SgprRBS32 = {SgprRB, S32}; + + auto Add = CSEB.buildAdd(S32, R0, R1); + auto AddRC = CSEB.buildInstr(AMDGPU::G_ADD, {SgprRCS32}, {R0, R1}); + auto AddRB = CSEB.buildInstr(AMDGPU::G_ADD, {{SgprRB, S32}}, {R0, R1}); + + EXPECT_NE(Add, AddRC); + EXPECT_NE(Add, AddRB); + EXPECT_NE(AddRC, AddRB); + + auto Add_CSE = CSEB.buildAdd(S32, R0, R1); + auto AddRC_CSE = CSEB.buildInstr(AMDGPU::G_ADD, {{SgprRC, S32}}, {R0, R1}); + auto AddRB_CSE = CSEB.buildInstr(AMDGPU::G_ADD, {SgprRBS32}, {R0, R1}); + + EXPECT_EQ(Add, Add_CSE); + EXPECT_EQ(AddRC, AddRC_CSE); + EXPECT_EQ(AddRB, AddRB_CSE); +} diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp index aaa8e96de6d171..50b78f6f48afdf 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/LegalityTest.cpp @@ -29,7 +29,7 @@ struct LegalityTest : public testing::Test { TEST_F(LegalityTest, Legality) { parseIR(C, R"IR( -define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float %farg0, float %farg1) { +define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float %farg0, float %farg1, i64 %v0, i64 %v1) { %gep0 = getelementptr float, ptr %ptr, i32 0 %gep1 = getelementptr float, ptr %ptr, i32 1 %gep3 = getelementptr float, ptr %ptr, i32 3 @@ -42,6 +42,8 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float store i8 %arg, ptr %gep1 %fadd0 = fadd float %farg0, %farg0 %fadd1 = fadd fast float %farg1, %farg1 + %trunc0 = trunc nuw nsw i64 %v0 to i8 + %trunc1 = trunc nsw i64 %v1 to i8 ret void } )IR"); @@ -62,6 +64,8 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float auto *StI8 = cast(&*It++); auto *FAdd0 = cast(&*It++); auto *FAdd1 = cast(&*It++); + auto *Trunc0 = cast(&*It++); + auto *Trunc1 = cast(&*It++); sandboxir::LegalityAnalysis Legality; const auto &Result = Legality.canVectorize({St0, St1}); @@ -98,6 +102,13 @@ define void @foo(ptr %ptr, <2 x float> %vec2, <3 x float> %vec3, i8 %arg, float EXPECT_EQ(cast(Result).getReason(), sandboxir::ResultReason::DiffMathFlags); } + { + // Check DiffWrapFlags + const auto &Result = Legality.canVectorize({Trunc0, Trunc1}); + EXPECT_TRUE(isa(Result)); + EXPECT_EQ(cast(Result).getReason(), + sandboxir::ResultReason::DiffWrapFlags); + } } #ifndef NDEBUG @@ -124,5 +135,8 @@ TEST_F(LegalityTest, LegalityResultDump) { EXPECT_TRUE(Matches(Legality.createLegalityResult( sandboxir::ResultReason::DiffMathFlags), "Pack Reason: DiffMathFlags")); + EXPECT_TRUE(Matches(Legality.createLegalityResult( + sandboxir::ResultReason::DiffWrapFlags), + "Pack Reason: DiffWrapFlags")); } #endif // NDEBUG diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index d2228c902a56b4..f17c62dd1fd9d4 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -813,8 +813,8 @@ void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) const { void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out, const TypeSetByHwMode::SetType &Legal) const { - if (Out.count(MVT::iPTRAny)) { - Out.erase(MVT::iPTRAny); + if (Out.count(MVT::pAny)) { + Out.erase(MVT::pAny); Out.insert(MVT::iPTR); } else if (Out.count(MVT::iAny)) { Out.erase(MVT::iAny); @@ -2461,7 +2461,8 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) { ValueTypeByHwMode VVT = TP.getInfer().getConcrete(Types[0], false); for (auto &P : VVT) { MVT::SimpleValueType VT = P.second.SimpleTy; - if (VT == MVT::iPTR || VT == MVT::iPTRAny) + // Can only check for types of a known size + if (VT == MVT::iPTR) continue; unsigned Size = MVT(VT).getFixedSizeInBits(); // Make sure that the value is representable for this type. diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.cpp b/llvm/utils/TableGen/Common/CodeGenTarget.cpp index b358518c4290b0..4e75db689a0b57 100644 --- a/llvm/utils/TableGen/Common/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/Common/CodeGenTarget.cpp @@ -47,19 +47,6 @@ MVT::SimpleValueType llvm::getValueType(const Record *Rec) { return (MVT::SimpleValueType)Rec->getValueAsInt("Value"); } -StringRef llvm::getName(MVT::SimpleValueType T) { - switch (T) { - case MVT::Other: - return "UNKNOWN"; - case MVT::iPTR: - return "TLI.getPointerTy()"; - case MVT::iPTRAny: - return "TLI.getPointerTy()"; - default: - return getEnumName(T); - } -} - StringRef llvm::getEnumName(MVT::SimpleValueType T) { // clang-format off switch (T) { diff --git a/llvm/utils/TableGen/Common/CodeGenTarget.h b/llvm/utils/TableGen/Common/CodeGenTarget.h index c7b44f7028eb5b..8bcb2f677a00b0 100644 --- a/llvm/utils/TableGen/Common/CodeGenTarget.h +++ b/llvm/utils/TableGen/Common/CodeGenTarget.h @@ -46,7 +46,6 @@ class CodeGenSubRegIndex; /// record corresponds to. MVT::SimpleValueType getValueType(const Record *Rec); -StringRef getName(MVT::SimpleValueType T); StringRef getEnumName(MVT::SimpleValueType T); /// getQualifiedName - Return the name of the specified record, with a diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index 17198c85f06009..2052222cae5e5f 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -718,19 +718,20 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { const PredMap &PM = RI.second; OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_" - << getLegalCName(std::string(getName(RetVT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_" + << getLegalCName(std::string(getEnumName(RetVT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "("; Operands.PrintParameters(OS); OS << ") {\n"; - emitInstructionCode(OS, Operands, PM, std::string(getName(RetVT))); + emitInstructionCode(OS, Operands, PM, + std::string(getEnumName(RetVT))); } // Emit one function for the type that demultiplexes on return type. OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "(MVT RetVT"; if (!Operands.empty()) @@ -739,10 +740,10 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { OS << ") {\nswitch (RetVT.SimpleTy) {\n"; for (const auto &RI : RM) { MVT::SimpleValueType RetVT = RI.first; - OS << " case " << getName(RetVT) << ": return fastEmit_" + OS << " case " << getEnumName(RetVT) << ": return fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_" - << getLegalCName(std::string(getName(RetVT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_" + << getLegalCName(std::string(getEnumName(RetVT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "("; Operands.PrintArguments(OS); @@ -753,7 +754,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { } else { // Non-variadic return type. OS << "unsigned fastEmit_" << getLegalCName(Opcode) << "_" - << getLegalCName(std::string(getName(VT))) << "_"; + << getLegalCName(std::string(getEnumName(VT))) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); OS << "(MVT RetVT"; if (!Operands.empty()) @@ -761,7 +762,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { Operands.PrintParameters(OS); OS << ") {\n"; - OS << " if (RetVT.SimpleTy != " << getName(RM.begin()->first) + OS << " if (RetVT.SimpleTy != " << getEnumName(RM.begin()->first) << ")\n return 0;\n"; const PredMap &PM = RM.begin()->second; @@ -781,7 +782,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { OS << " switch (VT.SimpleTy) {\n"; for (const auto &TI : TM) { MVT::SimpleValueType VT = TI.first; - std::string TypeName = std::string(getName(VT)); + std::string TypeName = std::string(getEnumName(VT)); OS << " case " << TypeName << ": return fastEmit_" << getLegalCName(Opcode) << "_" << getLegalCName(TypeName) << "_"; Operands.PrintManglingSuffix(OS, ImmediatePredicates); diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py index cdfa8978566fb4..b108a21dbc52b8 100644 --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -2470,6 +2470,7 @@ def get_autogennote_suffix(parser, args): "verbose", "force_update", "reset_variable_names", + "llvm_mc_binary", ): continue value = getattr(args, action.dest) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 8121e34dcf6eff..776f1d32c5f520 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -995,6 +995,7 @@ if (current_toolchain == default_toolchain) { "coroutine", "csetjmp", "csignal", + "cstdalign", "cstdarg", "cstdbool", "cstddef", diff --git a/llvm/utils/update_mc_test_checks.py b/llvm/utils/update_mc_test_checks.py index 55ed6c82d4877e..c8a40b37088ae4 100755 --- a/llvm/utils/update_mc_test_checks.py +++ b/llvm/utils/update_mc_test_checks.py @@ -16,7 +16,6 @@ mc_LIKE_TOOLS = [ "llvm-mc", - "not llvm-mc", ] ERROR_RE = re.compile(r":\d+: (warning|error): .*") ERROR_CHECK_RE = re.compile(r"# COM: .*") @@ -24,7 +23,7 @@ COMMENT = {"asm": "//", "dasm": "#"} -def invoke_tool(exe, cmd_args, testline, verbose=False): +def invoke_tool(exe, check_rc, cmd_args, testline, verbose=False): if isinstance(cmd_args, list): args = [applySubstitutions(a, substitutions) for a in cmd_args] else: @@ -33,7 +32,15 @@ def invoke_tool(exe, cmd_args, testline, verbose=False): cmd = 'echo "' + testline + '" | ' + exe + " " + args if verbose: print("Command: ", cmd) - out = subprocess.check_output(cmd, shell=True) + + out = subprocess.run( + cmd, + shell=True, + check=check_rc, + stdout=subprocess.PIPE, + stderr=subprocess.DEVNULL, + ).stdout + # Fix line endings to unix CR style. return out.decode().replace("\r\n", "\n") @@ -102,8 +109,16 @@ def getStdCheckLine(prefix, output, mc_mode): return o -def getErrCheckLine(prefix, output, mc_mode): - return COMMENT[mc_mode] + " " + prefix + ": " + ":[[@LINE-1]]" + output + "\n" +def getErrCheckLine(prefix, output, mc_mode, line_offset=1): + return ( + COMMENT[mc_mode] + + " " + + prefix + + ": " + + ":[[@LINE-{}]]".format(line_offset) + + output + + "\n" + ) def main(): @@ -174,11 +189,19 @@ def main(): assert len(commands) >= 2 mc_cmd = " | ".join(commands[:-1]) filecheck_cmd = commands[-1] - mc_tool = mc_cmd.split(" ")[0] # special handling for negating exit status - if mc_tool == "not": - mc_tool = mc_tool + " " + mc_cmd.split(" ")[1] + # if not is used in runline, disable rc check, since + # the command might or might not + # return non-zero code on a single line run + check_rc = True + mc_cmd_args = mc_cmd.strip().split() + if mc_cmd_args[0] == "not": + check_rc = False + mc_tool = mc_cmd_args[1] + mc_cmd = mc_cmd[len(mc_cmd_args[0]) :].strip() + else: + mc_tool = mc_cmd_args[0] triple_in_cmd = None m = common.TRIPLE_ARG_RE.search(mc_cmd) @@ -211,6 +234,7 @@ def main(): ( check_prefixes, mc_tool, + check_rc, mc_cmd_args, triple_in_cmd, march_in_cmd, @@ -231,6 +255,7 @@ def main(): for ( prefixes, mc_tool, + check_rc, mc_args, triple_in_cmd, march_in_cmd, @@ -249,6 +274,7 @@ def main(): # get output for each testline out = invoke_tool( ti.args.llvm_mc_binary or mc_tool, + check_rc, mc_args, line, verbose=ti.args.verbose, @@ -305,6 +331,9 @@ def main(): # each run_id can only be used once gen_prefix = "" used_runid = set() + + # line number diff between generated prefix and testline + line_offset = 1 for prefix, tup in p_dict_sorted.items(): o, run_ids = tup @@ -321,9 +350,13 @@ def main(): used_prefixes.add(prefix) if hasErr(o): - gen_prefix += getErrCheckLine(prefix, o, mc_mode) + newline = getErrCheckLine(prefix, o, mc_mode, line_offset) else: - gen_prefix += getStdCheckLine(prefix, o, mc_mode) + newline = getStdCheckLine(prefix, o, mc_mode) + + if newline: + gen_prefix += newline + line_offset += 1 generated_prefixes[input_line] = gen_prefix.rstrip("\n") diff --git a/mlir/docs/Rationale/SideEffectsAndSpeculation.md b/mlir/docs/Rationale/SideEffectsAndSpeculation.md index 8b08b757531bef..4d9021a356dfea 100644 --- a/mlir/docs/Rationale/SideEffectsAndSpeculation.md +++ b/mlir/docs/Rationale/SideEffectsAndSpeculation.md @@ -79,9 +79,9 @@ When adding a new op, ask: 1. Does it read from or write to the heap or stack? It should probably implement `MemoryEffectsOpInterface`. -1. Does these side effects ordered? It should probably set the stage of - side effects to make analysis more accurate. -1. Does These side effects act on every single value of resource? It probably +1. Are these side effects ordered? The op should probably set the stage of + side effects to make analyses more accurate. +1. Do these side effects act on every single value of a resource? It probably should set the FullEffect on effect. 1. Does it have side effects that must be preserved, like a volatile store or a syscall? It should probably implement `MemoryEffectsOpInterface` and model @@ -106,9 +106,9 @@ add side effect correctly. ### SIMD compute operation -If we have a SIMD backend dialect with a "simd.abs" operation, which reads all +Consider a SIMD backend dialect with a "simd.abs" operation which reads all values from the source memref, calculates their absolute values, and writes them -to the target memref. +to the target memref: ```mlir func.func @abs(%source : memref<10xf32>, %target : memref<10xf32>) { @@ -139,10 +139,10 @@ A typical approach is as follows: } ``` -In the above example, we attach the side effect [MemReadAt<0, FullEffect>] to +In the above example, we attach the side effect `[MemReadAt<0, FullEffect>]` to the source, indicating that the abs operation reads each individual value from the source during stage 0. Likewise, we attach the side effect -[MemWriteAt<1, FullEffect>] to the target, indicating that the abs operation +`[MemWriteAt<1, FullEffect>]` to the target, indicating that the abs operation writes to each individual value within the target during stage 1 (after reading from the source). @@ -174,7 +174,7 @@ A typical approach is as follows: } ``` -In the above example, we attach the side effect [MemReadAt<0, PartialEffect>] to +In the above example, we attach the side effect `[MemReadAt<0, PartialEffect>]` to the source, indicating that the load operation reads parts of values from the memref during stage 0. Since side effects typically occur at stage 0 and are -partial by default, we can abbreviate it as "[MemRead]". +partial by default, we can abbreviate it as `[MemRead]`. diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index aceb9d059b95f3..4866e31b19d5de 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -60,7 +60,8 @@ struct AliasingValue { bool isDefinite; }; -template class AliasList { +template +class AliasList { public: /// Create an empty list of aliases. AliasList() = default; @@ -259,7 +260,7 @@ struct BufferizationOptions { /// Initializer function for analysis state. using AnalysisStateInitFn = std::function; /// Tensor -> MemRef type converter. - /// Parameters: Value, memory space, func op, bufferization options + /// Parameters: tensor type, memory space, func op, bufferization options using FunctionArgTypeConverterFn = std::function; @@ -344,9 +345,9 @@ struct BufferizationOptions { void setFunctionBoundaryTypeConversion(LayoutMapOption layoutMapOption); /// Type converter from tensors to memrefs. This type converter is used to - /// determine bufferized function argument types. By default, a type - /// converter that returns a memref type with a fully dynamic layout map is - /// used. + /// determine bufferized function argument and result types. By default, a + /// type converter that returns a memref type with a fully dynamic layout map + /// is used. /// /// If `bufferizeFunctionBoundaries` is not set, this function isn't used. FunctionArgTypeConverterFn functionArgTypeConverterFn = nullptr; diff --git a/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h b/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h index 1d311b37b37a4f..1be406bf3adf92 100644 --- a/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h +++ b/mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h @@ -23,70 +23,10 @@ namespace mlir { -/// This class provides a hook that expands one Value into multiple Value's, -/// with a TypeConverter-inspired callback registration mechanism. -/// -/// For folks that are familiar with the dialect conversion framework / -/// TypeConverter, this is effectively the inverse of a source/argument -/// materialization. A target materialization is not what we want here because -/// it always produces a single Value, but in this case the whole point is to -/// decompose a Value into multiple Value's. -/// -/// The reason we need this inverse is easily understood by looking at what we -/// need to do for decomposing types for a return op. When converting a return -/// op, the dialect conversion framework will give the list of converted -/// operands, and will ensure that each converted operand, even if it expanded -/// into multiple types, is materialized as a single result. We then need to -/// undo that materialization to a single result, which we do with the -/// decomposeValue hooks registered on this object. -/// -/// TODO: Eventually, the type conversion infra should have this hook built-in. -/// See -/// https://llvm.discourse.group/t/extending-type-conversion-infrastructure/779/2 -class ValueDecomposer { -public: - /// This method tries to decompose a value of a certain type using provided - /// decompose callback functions. If it is unable to do so, the original value - /// is returned. - void decomposeValue(OpBuilder &, Location, Type, Value, - SmallVectorImpl &); - - /// This method registers a callback function that will be called to decompose - /// a value of a certain type into 0, 1, or multiple values. - template >::template arg_t<2>> - void addDecomposeValueConversion(FnT &&callback) { - decomposeValueConversions.emplace_back( - wrapDecomposeValueConversionCallback(std::forward(callback))); - } - -private: - using DecomposeValueConversionCallFn = - std::function( - OpBuilder &, Location, Type, Value, SmallVectorImpl &)>; - - /// Generate a wrapper for the given decompose value conversion callback. - template - DecomposeValueConversionCallFn - wrapDecomposeValueConversionCallback(FnT &&callback) { - return - [callback = std::forward(callback)]( - OpBuilder &builder, Location loc, Type type, Value value, - SmallVectorImpl &newValues) -> std::optional { - if (T derivedType = dyn_cast(type)) - return callback(builder, loc, derivedType, value, newValues); - return std::nullopt; - }; - } - - SmallVector decomposeValueConversions; -}; - /// Populates the patterns needed to drive the conversion process for -/// decomposing call graph types with the given `ValueDecomposer`. +/// decomposing call graph types with the given `TypeConverter`. void populateDecomposeCallGraphTypesPatterns(MLIRContext *context, const TypeConverter &typeConverter, - ValueDecomposer &decomposer, RewritePatternSet &patterns); } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index bfc609bd708164..c2fee8ea55c960 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -30,6 +30,7 @@ class LinalgStructuredBase_Op props> SingleBlockImplicitTerminator<"YieldOp">, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, + RecursiveMemoryEffects, DestinationStyleOpInterface, LinalgStructuredInterface, ReifyRankedShapedTypeOpInterface], props)> { diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h index ca3326dbbef519..a761a77a407e87 100644 --- a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h +++ b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h @@ -32,7 +32,8 @@ namespace memref { bool isStaticShapeAndContiguousRowMajor(MemRefType type); /// For a `memref` with `offset`, `sizes` and `strides`, returns the -/// offset and size to use for the linearized `memref`. +/// offset, size, and potentially the size padded at the front to use for the +/// linearized `memref`. /// - If the linearization is done for emulating load/stores of /// element type with bitwidth `srcBits` using element type with /// bitwidth `dstBits`, the linearized offset and size are @@ -42,9 +43,14 @@ bool isStaticShapeAndContiguousRowMajor(MemRefType type); /// index to use in the linearized `memref`. The linearized index /// is also scaled down by `dstBits`/`srcBits`. If `indices` is not provided /// 0, is returned for the linearized index. +/// - If the size of the load/store is smaller than the linearized memref +/// load/store, the memory region emulated is larger than the actual memory +/// region needed. `intraDataOffset` returns the element offset of the data +/// relevant at the beginning. struct LinearizedMemRefInfo { OpFoldResult linearizedOffset; OpFoldResult linearizedSize; + OpFoldResult intraDataOffset; }; std::pair getLinearizedMemRefOffsetAndSize( OpBuilder &builder, Location loc, int srcBits, int dstBits, diff --git a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp index 11bfde890bce87..7f433254e95ae2 100644 --- a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp +++ b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitCPass.cpp @@ -40,6 +40,19 @@ struct ConvertMemRefToEmitCPass populateMemRefToEmitCTypeConversion(converter); + auto materializeAsUnrealizedCast = [](OpBuilder &builder, Type resultType, + ValueRange inputs, + Location loc) -> Value { + if (inputs.size() != 1) + return Value(); + + return builder.create(loc, resultType, inputs) + .getResult(0); + }; + + converter.addSourceMaterialization(materializeAsUnrealizedCast); + converter.addTargetMaterialization(materializeAsUnrealizedCast); + RewritePatternSet patterns(&getContext()); populateMemRefToEmitCConversionPatterns(patterns, converter); diff --git a/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp index c8cfcc3d945bec..46c7bfbf3ffcc2 100644 --- a/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp +++ b/mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp @@ -37,40 +37,38 @@ std::pair getTileSizes(ConversionPatternRewriter &rewriter, rewriter.create(loc, llvmInt16Type, nattr)); } -/// Verifies if the stride matches proper tile access. -LogicalResult verifyStride(MemRefType mType) { - if (mType.getRank() < 2) - return failure(); - int64_t last = mType.getRank() - 1; - int64_t offset; - SmallVector strides; - if (failed(getStridesAndOffset(mType, strides, offset)) || strides[last] != 1) - return failure(); - return success(); -} - /// Maps the 2-dim memref shape to the 64-bit stride. Note that the buffer /// shape may "envelop" the actual tile shape, and may be dynamically sized. -Value getStride(ConversionPatternRewriter &rewriter, - const LLVMTypeConverter &typeConverter, MemRefType mType, - Value base, Location loc) { - assert(mType.getRank() >= 2); - int64_t last = mType.getRank() - 1; +/// Returns failure if proper stride couldn't be found. +FailureOr getStride(ConversionPatternRewriter &rewriter, + const LLVMTypeConverter &typeConverter, + MemRefType mType, Value base, Location loc) { + if (mType.getRank() < 2) + return failure(); + int64_t preLast = mType.getRank() - 2; Type llvmInt64Type = IntegerType::get(&typeConverter.getContext(), 64); unsigned width = mType.getElementType().getIntOrFloatBitWidth(); assert(llvm::isPowerOf2_64(width) && width >= 8); unsigned bytes = width >> 3; - if (mType.isDynamicDim(last)) { - // Dynamic size needs code to compute the stride at runtime. + int64_t offset; + SmallVector strides; + if (failed(getStridesAndOffset(mType, strides, offset)) || + strides.back() != 1) + return failure(); + if (strides[preLast] == ShapedType::kDynamic) { + // Dynamic stride needs code to compute the stride at runtime. MemRefDescriptor memrefDescriptor(base); auto attr = rewriter.getI64IntegerAttr(bytes); Value scale = rewriter.create(loc, llvmInt64Type, attr); - return rewriter.create( - loc, llvmInt64Type, scale, memrefDescriptor.size(rewriter, loc, last)); + return rewriter + .create(loc, llvmInt64Type, scale, + memrefDescriptor.stride(rewriter, loc, preLast)) + .getResult(); } - // Use direct constant for static size. - auto attr = rewriter.getI64IntegerAttr(mType.getDimSize(last) * bytes); - return rewriter.create(loc, llvmInt64Type, attr); + // Use direct constant for static stride. + auto attr = rewriter.getI64IntegerAttr(strides[preLast] * bytes); + return rewriter.create(loc, llvmInt64Type, attr) + .getResult(); } struct TileZeroConversion : public ConvertOpToLLVMPattern { @@ -102,16 +100,16 @@ struct TileLoadConversion : public ConvertOpToLLVMPattern { std::pair tsz = getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc()); // Determine stride. - if (failed(verifyStride(mType))) + auto stride = getStride(rewriter, *getTypeConverter(), mType, + adaptor.getBase(), op.getLoc()); + if (failed(stride)) return failure(); - Value stride = getStride(rewriter, *getTypeConverter(), mType, - adaptor.getBase(), op.getLoc()); // Replace operation with intrinsic. Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(), adaptor.getIndices(), rewriter); Type resType = typeConverter->convertType(vType); rewriter.replaceOpWithNewOp( - op, resType, tsz.first, tsz.second, ptr, stride); + op, resType, tsz.first, tsz.second, ptr, stride.value()); return success(); } }; @@ -128,15 +126,15 @@ struct TileStoreConversion : public ConvertOpToLLVMPattern { std::pair tsz = getTileSizes(rewriter, *getTypeConverter(), vType, op.getLoc()); // Determine stride. - if (failed(verifyStride(mType))) + auto stride = getStride(rewriter, *getTypeConverter(), mType, + adaptor.getBase(), op.getLoc()); + if (failed(stride)) return failure(); - Value stride = getStride(rewriter, *getTypeConverter(), mType, - adaptor.getBase(), op.getLoc()); // Replace operation with intrinsic. Value ptr = getStridedElementPtr(op.getLoc(), mType, adaptor.getBase(), adaptor.getIndices(), rewriter); rewriter.replaceOpWithNewOp( - op, tsz.first, tsz.second, ptr, stride, adaptor.getVal()); + op, tsz.first, tsz.second, ptr, stride.value(), adaptor.getVal()); return success(); } }; diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 9fbe574ec392dc..6e91d3b89a7c79 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -82,7 +82,8 @@ getBufferizedFunctionArgType(FuncOp funcOp, int64_t index, /// Return the FuncOp called by `callOp`. static FuncOp getCalledFunction(CallOpInterface callOp) { - SymbolRefAttr sym = llvm::dyn_cast_if_present(callOp.getCallableForCallee()); + SymbolRefAttr sym = + llvm::dyn_cast_if_present(callOp.getCallableForCallee()); if (!sym) return nullptr; return dyn_cast_or_null( @@ -392,11 +393,11 @@ struct FuncOpInterface auto funcOp = cast(op); FunctionType funcType = funcOp.getFunctionType(); - // Construct the bufferized function type. + // Compute the argument types. SmallVector argTypes; for (const auto &it : llvm::enumerate(funcType.getInputs())) { Type argType = it.value(); - if (dyn_cast(argType)) { + if (isa(argType)) { argTypes.push_back( getBufferizedFunctionArgType(funcOp, it.index(), options)); continue; @@ -404,24 +405,33 @@ struct FuncOpInterface argTypes.push_back(argType); } - // Bodiless functions are assumed opaque and we cannot know the - // bufferization contract they want to enforce. As a consequence, only - // support functions that don't return any tensors atm. - if (funcOp.isExternal()) { - SmallVector retTypes; - for (Type resultType : funcType.getResults()) { - if (isa(resultType)) - return funcOp->emitError() << "cannot bufferize bodiless function " - << "that returns a tensor"; + // Compute the result types. + SmallVector retTypes; + for (Type resultType : funcType.getResults()) { + if (auto tensorType = dyn_cast(resultType)) { + BaseMemRefType resultType = options.functionArgTypeConverterFn( + tensorType, *options.defaultMemorySpaceFn(tensorType), funcOp, + options); retTypes.push_back(resultType); + continue; } - funcOp.setType(FunctionType::get(op->getContext(), argTypes, retTypes)); + retTypes.push_back(resultType); + } + + // Compute the new function type. + auto newFuncType = FunctionType::get(op->getContext(), argTypes, retTypes); + + // If the function has no body, set the new function type and we are done. + if (funcOp.isExternal()) { + funcOp.setType(newFuncType); return success(); } // TODO: Support functions with multiple returns. func::ReturnOp returnOp = getAssumedUniqueReturnOp(funcOp); assert(returnOp && "expected func with single return op"); + assert(returnOp->getNumOperands() == retTypes.size() && + "incorrect number of return values"); Location loc = returnOp.getLoc(); // 1. Bufferize every block. @@ -430,10 +440,10 @@ struct FuncOpInterface options))) return failure(); - // 2. For each result, keep track of which inplace argument it reuses. + // 2. Bufferize all operands of the return op. SmallVector returnValues; - for (OpOperand &returnOperand : returnOp->getOpOperands()) { - Value returnVal = returnOperand.get(); + for (auto [returnVal, bufferizedType] : + llvm::zip_equal(returnOp->getOperands(), retTypes)) { auto tensorType = dyn_cast(returnVal.getType()); rewriter.setInsertionPoint(returnOp); @@ -443,23 +453,17 @@ struct FuncOpInterface continue; } - // Note: If `inferFunctionResultLayout = true`, cast are later folded + // Note: If `inferFunctionResultLayout = true`, casts are later folded // away. - BaseMemRefType resultType = options.functionArgTypeConverterFn( - tensorType, *options.defaultMemorySpaceFn(tensorType), funcOp, - options); Value toMemrefOp = rewriter.create( - loc, resultType, returnVal); + loc, bufferizedType, returnVal); returnValues.push_back(toMemrefOp); } - // 3. Rewrite the terminator without the in-place bufferizable values. returnOp.getOperandsMutable().assign(returnValues); - // 4. Rewrite the FuncOp type to buffer form. - funcOp.setType(FunctionType::get(op->getContext(), argTypes, - ValueRange(returnValues).getTypes())); - + // 3. Set the new function type. + funcOp.setType(newFuncType); return success(); } diff --git a/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp b/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp index 357f993710a26a..de4aba2ed327db 100644 --- a/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp +++ b/mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp @@ -14,52 +14,48 @@ using namespace mlir; using namespace mlir::func; //===----------------------------------------------------------------------===// -// ValueDecomposer +// Helper functions //===----------------------------------------------------------------------===// -void ValueDecomposer::decomposeValue(OpBuilder &builder, Location loc, - Type type, Value value, - SmallVectorImpl &results) { - for (auto &conversion : decomposeValueConversions) - if (conversion(builder, loc, type, value, results)) - return; - results.push_back(value); +/// If the given value can be decomposed with the type converter, decompose it. +/// Otherwise, return the given value. +// TODO: Value decomposition should happen automatically through a 1:N adaptor. +// This function will disappear when the 1:1 and 1:N drivers are merged. +static SmallVector decomposeValue(OpBuilder &builder, Location loc, + Value value, + const TypeConverter *converter) { + // Try to convert the given value's type. If that fails, just return the + // given value. + SmallVector convertedTypes; + if (failed(converter->convertType(value.getType(), convertedTypes))) + return {value}; + if (convertedTypes.empty()) + return {}; + + // If the given value's type is already legal, just return the given value. + TypeRange convertedTypeRange(convertedTypes); + if (convertedTypeRange == TypeRange(value.getType())) + return {value}; + + // Try to materialize a target conversion. If the materialization did not + // produce values of the requested type, the materialization failed. Just + // return the given value in that case. + SmallVector result = converter->materializeTargetConversion( + builder, loc, convertedTypeRange, value); + if (result.empty()) + return {value}; + return result; } -//===----------------------------------------------------------------------===// -// DecomposeCallGraphTypesOpConversionPattern -//===----------------------------------------------------------------------===// - -namespace { -/// Base OpConversionPattern class to make a ValueDecomposer available to -/// inherited patterns. -template -class DecomposeCallGraphTypesOpConversionPattern - : public OpConversionPattern { -public: - DecomposeCallGraphTypesOpConversionPattern(const TypeConverter &typeConverter, - MLIRContext *context, - ValueDecomposer &decomposer, - PatternBenefit benefit = 1) - : OpConversionPattern(typeConverter, context, benefit), - decomposer(decomposer) {} - -protected: - ValueDecomposer &decomposer; -}; -} // namespace - //===----------------------------------------------------------------------===// // DecomposeCallGraphTypesForFuncArgs //===----------------------------------------------------------------------===// namespace { -/// Expand function arguments according to the provided TypeConverter and -/// ValueDecomposer. +/// Expand function arguments according to the provided TypeConverter. struct DecomposeCallGraphTypesForFuncArgs - : public DecomposeCallGraphTypesOpConversionPattern { - using DecomposeCallGraphTypesOpConversionPattern:: - DecomposeCallGraphTypesOpConversionPattern; + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(func::FuncOp op, OpAdaptor adaptor, @@ -100,19 +96,22 @@ struct DecomposeCallGraphTypesForFuncArgs //===----------------------------------------------------------------------===// namespace { -/// Expand return operands according to the provided TypeConverter and -/// ValueDecomposer. +/// Expand return operands according to the provided TypeConverter. struct DecomposeCallGraphTypesForReturnOp - : public DecomposeCallGraphTypesOpConversionPattern { - using DecomposeCallGraphTypesOpConversionPattern:: - DecomposeCallGraphTypesOpConversionPattern; + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult matchAndRewrite(ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const final { SmallVector newOperands; - for (Value operand : adaptor.getOperands()) - decomposer.decomposeValue(rewriter, op.getLoc(), operand.getType(), - operand, newOperands); + for (Value operand : adaptor.getOperands()) { + // TODO: We can directly take the values from the adaptor once this is a + // 1:N conversion pattern. + llvm::append_range(newOperands, + decomposeValue(rewriter, operand.getLoc(), operand, + getTypeConverter())); + } rewriter.replaceOpWithNewOp(op, newOperands); return success(); } @@ -124,12 +123,9 @@ struct DecomposeCallGraphTypesForReturnOp //===----------------------------------------------------------------------===// namespace { -/// Expand call op operands and results according to the provided TypeConverter -/// and ValueDecomposer. -struct DecomposeCallGraphTypesForCallOp - : public DecomposeCallGraphTypesOpConversionPattern { - using DecomposeCallGraphTypesOpConversionPattern:: - DecomposeCallGraphTypesOpConversionPattern; +/// Expand call op operands and results according to the provided TypeConverter. +struct DecomposeCallGraphTypesForCallOp : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; LogicalResult matchAndRewrite(CallOp op, OpAdaptor adaptor, @@ -137,9 +133,13 @@ struct DecomposeCallGraphTypesForCallOp // Create the operands list of the new `CallOp`. SmallVector newOperands; - for (Value operand : adaptor.getOperands()) - decomposer.decomposeValue(rewriter, op.getLoc(), operand.getType(), - operand, newOperands); + for (Value operand : adaptor.getOperands()) { + // TODO: We can directly take the values from the adaptor once this is a + // 1:N conversion pattern. + llvm::append_range(newOperands, + decomposeValue(rewriter, operand.getLoc(), operand, + getTypeConverter())); + } // Create the new result types for the new `CallOp` and track the indices in // the new call op's results that correspond to the old call op's results. @@ -189,9 +189,8 @@ struct DecomposeCallGraphTypesForCallOp void mlir::populateDecomposeCallGraphTypesPatterns( MLIRContext *context, const TypeConverter &typeConverter, - ValueDecomposer &decomposer, RewritePatternSet &patterns) { + RewritePatternSet &patterns) { patterns .add(typeConverter, context, - decomposer); + DecomposeCallGraphTypesForReturnOp>(typeConverter, context); } diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp index 7321b19068016c..6de744a7f75244 100644 --- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp +++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp @@ -81,11 +81,10 @@ std::pair getLinearizedMemRefOffsetAndSize( // Adjust linearizedIndices and size by the scale factor (dstBits / srcBits). int64_t scaler = dstBits / srcBits; - addMulMap = addMulMap.floorDiv(scaler); mulMap = mulMap.floorDiv(scaler); OpFoldResult linearizedIndices = affine::makeComposedFoldedAffineApply( - builder, loc, addMulMap, offsetValues); + builder, loc, addMulMap.floorDiv(scaler), offsetValues); OpFoldResult linearizedSize = affine::makeComposedFoldedAffineApply(builder, loc, mulMap, sizes); @@ -95,7 +94,11 @@ std::pair getLinearizedMemRefOffsetAndSize( OpFoldResult adjustBaseOffset = affine::makeComposedFoldedAffineApply( builder, loc, s0.floorDiv(scaler), {offset}); - return {{adjustBaseOffset, linearizedSize}, linearizedIndices}; + OpFoldResult intraVectorOffset = affine::makeComposedFoldedAffineApply( + builder, loc, addMulMap % scaler, offsetValues); + + return {{adjustBaseOffset, linearizedSize, intraVectorOffset}, + linearizedIndices}; } LinearizedMemRefInfo diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index d71a236f62f454..1853ae04f45d90 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -2951,11 +2951,11 @@ void InsertOp::getCanonicalizationPatterns(RewritePatternSet &results, InsertOpConstantFolder>(context); } -// Eliminates insert operations that produce values identical to their source -// value. This happens when the source and destination vectors have identical -// sizes. OpFoldResult vector::InsertOp::fold(FoldAdaptor adaptor) { - if (getNumIndices() == 0) + // Fold "vector.insert %v, %dest [] : vector<2x2xf32> from vector<2x2xf32>" to + // %v. Note: Do not fold "vector.insert %v, %dest [] : f32 into vector" + // (type mismatch). + if (getNumIndices() == 0 && getSourceType() == getType()) return getSource(); return {}; } diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp index 66362d3ca70fb6..1d6f8a991d9b5b 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/IR/BuiltinAttributes.h" @@ -22,8 +23,10 @@ #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace mlir; @@ -33,17 +36,22 @@ using namespace mlir; #define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n") /// Returns a compressed mask. The mask value is set only if any mask is present -/// in the scale range. E.g., if `scale` equals to 2, the following mask: +/// in the scale range. E.g., if `scale` equals to 2, and `intraDataOffset` +/// equals to 2, the following mask: /// /// %mask = [1, 1, 1, 0, 0, 0] /// -/// will return the following new compressed mask: +/// will first be padded with number of `intraDataOffset` zeros: +/// %mask = [0, 0, 1, 1, 1, 0, 0, 0] /// -/// %mask = [1, 1, 0] +/// then it will return the following new compressed mask: +/// +/// %mask = [0, 1, 1, 0] static FailureOr getCompressedMaskOp(OpBuilder &rewriter, Location loc, Value mask, - int origElements, int scale) { - auto numElements = (origElements + scale - 1) / scale; + int origElements, int scale, + int intraDataOffset = 0) { + auto numElements = (intraDataOffset + origElements + scale - 1) / scale; Operation *maskOp = mask.getDefiningOp(); SmallVector extractOps; @@ -67,6 +75,9 @@ static FailureOr getCompressedMaskOp(OpBuilder &rewriter, shape.back() = numElements; auto newMaskType = VectorType::get(shape, rewriter.getI1Type()); if (createMaskOp) { + // TODO: handle the case with non-zero intraDataOffset for CreateMaskOp. + if (intraDataOffset != 0) + return failure(); OperandRange maskOperands = createMaskOp.getOperands(); size_t numMaskOperands = maskOperands.size(); AffineExpr s0; @@ -86,11 +97,27 @@ static FailureOr getCompressedMaskOp(OpBuilder &rewriter, ArrayRef maskDimSizes = constantMaskOp.getMaskDimSizes(); size_t numMaskOperands = maskDimSizes.size(); int64_t origIndex = maskDimSizes[numMaskOperands - 1]; - int64_t maskIndex = (origIndex + scale - 1) / scale; + int64_t startIndex = intraDataOffset / scale; + int64_t maskIndex = llvm::divideCeil(intraDataOffset + origIndex, scale); + + // TODO: we only want the mask between [startIndex, maskIndex] to be true, + // the rest are false. + if (intraDataOffset != 0 && maskDimSizes.size() > 1) + return failure(); + SmallVector newMaskDimSizes(maskDimSizes.drop_back()); newMaskDimSizes.push_back(maskIndex); - newMask = rewriter.create(loc, newMaskType, - newMaskDimSizes); + + if (intraDataOffset == 0) { + newMask = rewriter.create(loc, newMaskType, + newMaskDimSizes); + } else { + SmallVector newMaskValues; + for (int64_t i = 0; i < numElements; ++i) + newMaskValues.push_back(i >= startIndex && i < maskIndex); + auto denseAttr = DenseElementsAttr::get(newMaskType, newMaskValues); + newMask = rewriter.create(loc, newMaskType, denseAttr); + } } while (!extractOps.empty()) { @@ -102,6 +129,26 @@ static FailureOr getCompressedMaskOp(OpBuilder &rewriter, return newMask; } +static Value extractSubvectorFrom(RewriterBase &rewriter, Location loc, + VectorType extractType, Value vector, + int64_t frontOffset, int64_t subvecSize) { + auto offsets = rewriter.getI64ArrayAttr({frontOffset}); + auto sizes = rewriter.getI64ArrayAttr({subvecSize}); + auto strides = rewriter.getI64ArrayAttr({1}); + return rewriter + .create(loc, extractType, vector, offsets, + sizes, strides) + ->getResult(0); +} + +static Value insertSubvectorInto(RewriterBase &rewriter, Location loc, + Value src, Value dest, int64_t offset) { + auto offsets = rewriter.getI64ArrayAttr({offset}); + auto strides = rewriter.getI64ArrayAttr({1}); + return rewriter.create(loc, dest.getType(), src, + dest, offsets, strides); +} + namespace { //===----------------------------------------------------------------------===// @@ -201,7 +248,8 @@ struct ConvertVectorMaskedStore final auto stridedMetadata = rewriter.create(loc, op.getBase()); OpFoldResult linearizedIndicesOfr; - std::tie(std::ignore, linearizedIndicesOfr) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndicesOfr) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -214,19 +262,19 @@ struct ConvertVectorMaskedStore final // Load the whole data and use arith.select to handle the corner cases. // E.g., given these input values: // - // %mask = [1, 1, 1, 0, 0, 0] - // %0[%c0, %c0] contains [0x1, 0x2, 0x3, 0x4, 0x5, 0x6] - // %value_to_store = [0x7, 0x8, 0x9, 0xA, 0xB, 0xC] + // %mask = [0, 1, 1, 1, 1, 1, 0, 0] + // %0[%c0, %c0] contains [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8] + // %value_to_store = [0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0] // // we'll have // - // expected output: [0x7, 0x8, 0x9, 0x4, 0x5, 0x6] + // expected output: [0x1, 0xA, 0xB, 0xC, 0xD, 0xE, 0x7, 0x8] // - // %new_mask = [1, 1, 0] - // %maskedload = [0x12, 0x34, 0x0] - // %bitcast = [0x1, 0x2, 0x3, 0x4, 0x0, 0x0] - // %select_using_original_mask = [0x7, 0x8, 0x9, 0x4, 0x0, 0x0] - // %packed_data = [0x78, 0x94, 0x00] + // %new_mask = [1, 1, 1, 0] + // %maskedload = [0x12, 0x34, 0x56, 0x00] + // %bitcast = [0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x0, 0x0] + // %select_using_shifted_mask = [0x1, 0xA, 0xB, 0xC, 0xD, 0xE, 0x0, 0x0] + // %packed_data = [0x1A, 0xBC, 0xDE, 0x00] // // Using the new mask to store %packed_data results in expected output. FailureOr newMask = @@ -243,8 +291,9 @@ struct ConvertVectorMaskedStore final loc, newType, adaptor.getBase(), linearizedIndices, newMask.value()->getResult(0), passThru); - Value valueToStore = rewriter.create( - loc, op.getValueToStore().getType(), newLoad); + auto newBitCastType = VectorType::get(numElements * scale, oldElementType); + Value valueToStore = + rewriter.create(loc, newBitCastType, newLoad); valueToStore = rewriter.create( loc, op.getMask(), op.getValueToStore(), valueToStore); valueToStore = @@ -294,19 +343,31 @@ struct ConvertVectorLoad final : OpConversionPattern { // %1 = vector.load %0[%linear_index] : memref<6xi8>, vector<2xi8> // %2 = vector.bitcast %1 : vector<2xi8> to vector<4xi4> // - // TODO: Currently, only the even number of elements loading is supported. - // To deal with the odd number of elements, one has to extract the - // subvector at the proper offset after bit-casting. + // There are cases where the number of elements to load is not byte-aligned, + // for example: + // + // %1 = vector.load %0[%c1, %c0] : memref<3x3xi2>, vector<3xi2> + // + // we will have to load extra bytes and extract the exact slice in between. + // + // %1 = vector.load %0[%c2] : memref<3xi8>, vector<2xi8> + // %2 = vector.bitcast %1 : vector<2xi8> to vector<8xi2> + // %3 = vector.extract_strided_slice %1 {offsets = [2], sizes = [3], strides + // = [1]} + // : vector<8xi2> to vector<3xi2> + // + // TODO: Currently the extract_strided_slice's attributes must be known at + // compile time as they must be constants. auto origElements = op.getVectorType().getNumElements(); - if (origElements % scale != 0) - return failure(); + bool isUnalignedEmulation = origElements % scale != 0; auto stridedMetadata = rewriter.create(loc, op.getBase()); OpFoldResult linearizedIndices; - std::tie(std::ignore, linearizedIndices) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndices) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -314,15 +375,31 @@ struct ConvertVectorLoad final : OpConversionPattern { stridedMetadata.getConstifiedMixedStrides(), getAsOpFoldResult(adaptor.getIndices())); - auto numElements = (origElements + scale - 1) / scale; + std::optional foldedIntraVectorOffset = + isUnalignedEmulation + ? getConstantIntValue(linearizedInfo.intraDataOffset) + : 0; + + if (!foldedIntraVectorOffset) { + // unimplemented case for dynamic intra vector offset + return failure(); + } + + auto numElements = + llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale); auto newLoad = rewriter.create( loc, VectorType::get(numElements, newElementType), adaptor.getBase(), getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices)); - auto bitCast = - rewriter.create(loc, op.getType(), newLoad); + Value result = rewriter.create( + loc, VectorType::get(numElements * scale, oldElementType), newLoad); - rewriter.replaceOp(op, bitCast->getResult(0)); + if (isUnalignedEmulation) { + result = extractSubvectorFrom(rewriter, loc, op.getType(), result, + *foldedIntraVectorOffset, origElements); + } + + rewriter.replaceOp(op, result); return success(); } }; @@ -396,13 +473,13 @@ struct ConvertVectorMaskedLoad final // subvector at the proper offset after bit-casting. auto origType = op.getVectorType(); auto origElements = origType.getNumElements(); - if (origElements % scale != 0) - return failure(); + bool isUnalignedEmulation = origElements % scale != 0; auto stridedMetadata = rewriter.create(loc, op.getBase()); OpFoldResult linearizedIndices; - std::tie(std::ignore, linearizedIndices) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndices) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -410,29 +487,68 @@ struct ConvertVectorMaskedLoad final stridedMetadata.getConstifiedMixedStrides(), getAsOpFoldResult(adaptor.getIndices())); + std::optional foldedIntraVectorOffset = + isUnalignedEmulation + ? getConstantIntValue(linearizedInfo.intraDataOffset) + : 0; + + if (!foldedIntraVectorOffset) { + // unimplemented case for dynamic intra vector offset + return failure(); + } + FailureOr newMask = - getCompressedMaskOp(rewriter, loc, op.getMask(), origElements, scale); + getCompressedMaskOp(rewriter, loc, op.getMask(), origElements, scale, + *foldedIntraVectorOffset); if (failed(newMask)) return failure(); - auto numElements = (origElements + scale - 1) / scale; - auto newType = VectorType::get(numElements, newElementType); + auto numElements = + llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale); + auto loadType = VectorType::get(numElements, newElementType); + auto newBitcastType = VectorType::get(numElements * scale, oldElementType); + + Value passthru = op.getPassThru(); + if (isUnalignedEmulation) { + // create an empty vector of the new type + auto emptyVector = rewriter.create( + loc, newBitcastType, rewriter.getZeroAttr(newBitcastType)); + passthru = insertSubvectorInto(rewriter, loc, passthru, emptyVector, + *foldedIntraVectorOffset); + } auto newPassThru = - rewriter.create(loc, newType, op.getPassThru()); + rewriter.create(loc, loadType, passthru); // Generating the new masked load. auto newLoad = rewriter.create( - loc, newType, adaptor.getBase(), + loc, loadType, adaptor.getBase(), getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices), newMask.value()->getResult(0), newPassThru); // Setting the part that originally was not effectively loaded from memory // to pass through. auto bitCast = - rewriter.create(loc, op.getType(), newLoad); - auto select = rewriter.create(loc, op.getMask(), bitCast, - op.getPassThru()); - rewriter.replaceOp(op, select->getResult(0)); + rewriter.create(loc, newBitcastType, newLoad); + + Value mask = op.getMask(); + if (isUnalignedEmulation) { + auto newSelectMaskType = + VectorType::get(numElements * scale, rewriter.getI1Type()); + // TODO: can fold if op's mask is constant + auto emptyVector = rewriter.create( + loc, newSelectMaskType, rewriter.getZeroAttr(newSelectMaskType)); + mask = insertSubvectorInto(rewriter, loc, op.getMask(), emptyVector, + *foldedIntraVectorOffset); + } + + Value result = + rewriter.create(loc, mask, bitCast, passthru); + + if (isUnalignedEmulation) { + result = extractSubvectorFrom(rewriter, loc, op.getType(), result, + *foldedIntraVectorOffset, origElements); + } + rewriter.replaceOp(op, result); return success(); } @@ -464,8 +580,8 @@ struct ConvertVectorTransferRead final int scale = dstBits / srcBits; auto origElements = op.getVectorType().getNumElements(); - if (origElements % scale != 0) - return failure(); + + bool isUnalignedEmulation = origElements % scale != 0; auto newPadding = rewriter.create(loc, newElementType, adaptor.getPadding()); @@ -474,7 +590,8 @@ struct ConvertVectorTransferRead final rewriter.create(loc, op.getSource()); OpFoldResult linearizedIndices; - std::tie(std::ignore, linearizedIndices) = + memref::LinearizedMemRefInfo linearizedInfo; + std::tie(linearizedInfo, linearizedIndices) = memref::getLinearizedMemRefOffsetAndSize( rewriter, loc, srcBits, dstBits, stridedMetadata.getConstifiedMixedOffset(), @@ -482,18 +599,34 @@ struct ConvertVectorTransferRead final stridedMetadata.getConstifiedMixedStrides(), getAsOpFoldResult(adaptor.getIndices())); - auto numElements = (origElements + scale - 1) / scale; - auto newReadType = VectorType::get(numElements, newElementType); + std::optional foldedIntraVectorOffset = + isUnalignedEmulation + ? getConstantIntValue(linearizedInfo.intraDataOffset) + : 0; + + if (!foldedIntraVectorOffset) { + // unimplemented case for dynamic inra-vector offset + return failure(); + } + + auto numElements = + llvm::divideCeil(*foldedIntraVectorOffset + origElements, scale); auto newRead = rewriter.create( - loc, newReadType, adaptor.getSource(), + loc, VectorType::get(numElements, newElementType), adaptor.getSource(), getValueOrCreateConstantIndexOp(rewriter, loc, linearizedIndices), newPadding); - auto bitCast = - rewriter.create(loc, op.getType(), newRead); + auto bitCast = rewriter.create( + loc, VectorType::get(numElements * scale, oldElementType), newRead); + + Value result = bitCast->getResult(0); + if (isUnalignedEmulation) { + result = extractSubvectorFrom(rewriter, loc, op.getType(), result, + *foldedIntraVectorOffset, origElements); + } + rewriter.replaceOp(op, result); - rewriter.replaceOp(op, bitCast->getResult(0)); return success(); } }; diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 44cf8331d55a73..0a62628b9ad240 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -2457,11 +2457,11 @@ legalizeUnresolvedMaterialization(RewriterBase &rewriter, } } - InFlightDiagnostic diag = op->emitError() - << "failed to legalize unresolved materialization " - "from (" - << inputOperands.getTypes() << ") to " << outputType - << " that remained live after conversion"; + InFlightDiagnostic diag = + op->emitError() << "failed to legalize unresolved materialization " + "from (" + << inputOperands.getTypes() << ") to (" << outputType + << ") that remained live after conversion"; diag.attachNote(op->getUsers().begin()->getLoc()) << "see existing live user here: " << *op->getUsers().begin(); return failure(); diff --git a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir index f4722da08cc40f..f5ef821cc9c058 100644 --- a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir +++ b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir @@ -1,28 +1,35 @@ // RUN: mlir-opt -convert-memref-to-emitc %s -split-input-file | FileCheck %s -// CHECK-LABEL: memref_store -// CHECK-SAME: %[[v:.*]]: f32, %[[i:.*]]: index, %[[j:.*]]: index -func.func @memref_store(%v : f32, %i: index, %j: index) { - // CHECK-NEXT: %[[ALLOCA:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.array<4x8xf32> - %0 = memref.alloca() : memref<4x8xf32> +// CHECK-LABEL: alloca() +func.func @alloca() { + // CHECK-NEXT: %[[ALLOCA:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.array<2xf32> + %0 = memref.alloca() : memref<2xf32> + return +} - // CHECK-NEXT: %[[SUBSCRIPT:.*]] = emitc.subscript %[[ALLOCA]][%[[i]], %[[j]]] : (!emitc.array<4x8xf32>, index, index) -> !emitc.lvalue +// ----- + +// CHECK-LABEL: memref_store +// CHECK-SAME: %[[buff:.*]]: memref<4x8xf32>, %[[v:.*]]: f32, %[[i:.*]]: index, %[[j:.*]]: index +func.func @memref_store(%buff : memref<4x8xf32>, %v : f32, %i: index, %j: index) { + // CHECK-NEXT: %[[BUFFER:.*]] = builtin.unrealized_conversion_cast %[[buff]] : memref<4x8xf32> to !emitc.array<4x8xf32> + + // CHECK-NEXT: %[[SUBSCRIPT:.*]] = emitc.subscript %[[BUFFER]][%[[i]], %[[j]]] : (!emitc.array<4x8xf32>, index, index) -> !emitc.lvalue // CHECK-NEXT: emitc.assign %[[v]] : f32 to %[[SUBSCRIPT]] : - memref.store %v, %0[%i, %j] : memref<4x8xf32> + memref.store %v, %buff[%i, %j] : memref<4x8xf32> return } // ----- // CHECK-LABEL: memref_load -// CHECK-SAME: %[[i:.*]]: index, %[[j:.*]]: index -func.func @memref_load(%i: index, %j: index) -> f32 { - // CHECK-NEXT: %[[ALLOCA:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.array<4x8xf32> - %0 = memref.alloca() : memref<4x8xf32> - - // CHECK-NEXT: %[[SUBSCRIPT:.*]] = emitc.subscript %[[ALLOCA]][%[[i]], %[[j]]] : (!emitc.array<4x8xf32>, index, index) -> !emitc.lvalue +// CHECK-SAME: %[[buff:.*]]: memref<4x8xf32>, %[[i:.*]]: index, %[[j:.*]]: index +func.func @memref_load(%buff : memref<4x8xf32>, %i: index, %j: index) -> f32 { + // CHECK-NEXT: %[[BUFFER:.*]] = builtin.unrealized_conversion_cast %[[buff]] : memref<4x8xf32> to !emitc.array<4x8xf32> + + // CHECK-NEXT: %[[SUBSCRIPT:.*]] = emitc.subscript %[[BUFFER]][%[[i]], %[[j]]] : (!emitc.array<4x8xf32>, index, index) -> !emitc.lvalue // CHECK-NEXT: %[[LOAD:.*]] = emitc.load %[[SUBSCRIPT]] : - %1 = memref.load %0[%i, %j] : memref<4x8xf32> + %1 = memref.load %buff[%i, %j] : memref<4x8xf32> // CHECK-NEXT: return %[[LOAD]] : f32 return %1 : f32 } diff --git a/mlir/test/Dialect/AMX/legalize-for-llvm.mlir b/mlir/test/Dialect/AMX/legalize-for-llvm.mlir index 992203153939fe..3cacbd0044f825 100644 --- a/mlir/test/Dialect/AMX/legalize-for-llvm.mlir +++ b/mlir/test/Dialect/AMX/legalize-for-llvm.mlir @@ -43,3 +43,31 @@ func.func @mulf(%arg0: memref, %arg1: memref) { amx.tile_store %arg1[%0, %0], %4 : memref, vector<16x16xf32> return } + +// CHECK-LABEL: strides( +// CHECK: %[[CST_64_1:.+]] = llvm.mlir.constant(64 : i64) : i64 +// CHECK: "amx.tileloadd64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_64_1]] +// CHECK: %[[CST_128_1:.+]] = llvm.mlir.constant(128 : i64) : i64 +// CHECK: "amx.tileloadd64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_128_1]] +// CHECK: llvm.mlir.constant(2 : i64) : i64 +// CHECK: llvm.extractvalue %{{.+}}[4, 0] +// CHECK: %[[STRIDE_1:.+]] = llvm.mul +// CHECK: "amx.tileloadd64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STRIDE_1]] +// CHECK: %[[CST_64_2:.+]] = llvm.mlir.constant(64 : i64) : i64 +// CHECK: "amx.tilestored64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_64_2]] +// CHECK: %[[CST_128_2:.+]] = llvm.mlir.constant(128 : i64) : i64 +// CHECK: "amx.tilestored64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[CST_128_2]] +// CHECK: llvm.mlir.constant(2 : i64) : i64 +// CHECK: llvm.extractvalue %{{.+}}[4, 0] +// CHECK: %[[STRIDE_2:.+]] = llvm.mul +// CHECK: "amx.tilestored64"(%{{.+}}, %{{.+}}, %{{.+}}, %[[STRIDE_2]] +func.func @strides(%arg0: memref<16x32xbf16>, %arg1: memref<16x32xbf16, strided<[64, 1]>>, %arg2: memref<16x32xbf16, strided<[?, 1]>>) { + %0 = arith.constant 0 : index + %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xbf16> into vector<16x32xbf16> + %2 = amx.tile_load %arg1[%0, %0] : memref<16x32xbf16, strided<[64, 1]>> into vector<16x32xbf16> + %3 = amx.tile_load %arg2[%0, %0] : memref<16x32xbf16, strided<[?, 1]>> into vector<16x32xbf16> + amx.tile_store %arg0[%0, %0], %3 : memref<16x32xbf16>, vector<16x32xbf16> + amx.tile_store %arg1[%0, %0], %1 : memref<16x32xbf16, strided<[64, 1]>>, vector<16x32xbf16> + amx.tile_store %arg2[%0, %0], %2 : memref<16x32xbf16, strided<[?, 1]>>, vector<16x32xbf16> + return +} diff --git a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir index ab18ce05e355d3..bae94c1be4da90 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir @@ -78,7 +78,7 @@ func.func @static_layout_to_no_layout_cast(%m: memref) -> memref> { %0 = bufferization.to_tensor %m : memref - // expected-error @+1 {{failed to legalize unresolved materialization from ('memref') to 'memref>' that remained live after conversion}} + // expected-error @+1 {{failed to legalize unresolved materialization from ('memref') to ('memref>') that remained live after conversion}} %1 = bufferization.to_memref %0 : memref> // expected-note @below{{see existing live user here}} return %1 : memref> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir index ee0f71f668dc74..2829eafb7c1c59 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -1,11 +1,5 @@ // RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="bufferize-function-boundaries=1" -split-input-file -verify-diagnostics -// expected-error @+2 {{cannot bufferize bodiless function that returns a tensor}} -// expected-error @+1 {{failed to bufferize op}} -func.func private @foo() -> tensor - -// ----- - // expected-error @+1 {{cannot bufferize a FuncOp with tensors and without a unique ReturnOp}} func.func @swappy(%cond1 : i1, %cond2 : i1, %t1 : tensor, %t2 : tensor) -> (tensor, tensor) @@ -123,17 +117,6 @@ func.func @to_tensor_op_unsupported(%m: memref, %idx: index) -> (f32) { // ----- -// expected-error @+2 {{failed to bufferize op}} -// expected-error @+1 {{cannot bufferize bodiless function that returns a tensor}} -func.func private @foo(%t : tensor) -> (f32, tensor, f32) - -func.func @call_to_unknown_tensor_returning_func(%t : tensor) { - call @foo(%t) : (tensor) -> (f32, tensor, f32) - return -} - -// ----- - func.func @yield_alloc_dominance_test_2(%cst : f32, %idx : index, %idx2 : index) -> f32 { %1 = bufferization.alloc_tensor(%idx) : tensor diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir index 0d5224514e3a02..d31b43477beb9f 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -42,6 +42,21 @@ func.func private @external_func_with_return_val(tensor<4xi32>) -> f32 // ----- +// Bufferization of bodiless function that returns a tensor. + +// CHECK: func.func private @foo(memref>) -> (f32, memref>, f32) +func.func private @foo(%t : tensor) -> (f32, tensor, f32) + +// CHECK: func.func @call_to_unknown_tensor_returning_func( +// CHECK-SAME: %[[arg0:.*]]: memref>) { +func.func @call_to_unknown_tensor_returning_func(%t : tensor) { + // CHECK: call @foo(%[[arg0]]) : (memref>) -> (f32, memref>, f32) + call @foo(%t) : (tensor) -> (f32, tensor, f32) + return +} + +// ----- + // A function that returns a non-equivalent tensor with layout map. // CHECK-LABEL: func @return_extract_slice(%{{.*}}) -> memref<2x?xf32, strided<[10, 1], offset: ?>> diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 4bc2ed140da91a..5de007b390c51d 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -1232,3 +1232,20 @@ func.func @transpose_buffer(%input: memref, // CHECK-SAME: %[[VAL_1:.*]]: memref) { // CHECK: linalg.transpose ins(%[[VAL_0]] : memref) // CHECK-SAME: outs(%[[VAL_1]] : memref) permutation = [0] + +// ----- + +// This test checks linalg op has a recursive memory effect. Otherwise +// linalg.map without a user would be DCEd. +func.func @recursive_effect(%arg : tensor<1xf32>) { + %init = arith.constant dense<0.0> : tensor<1xf32> + %mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>) + (%in : f32) { + vector.print %in : f32 + linalg.yield %in : f32 + } + func.return +} + +// CHECK-LABEL: @recursive_effect +// CHECK: linalg.map diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 6d6bc199e601c0..c963460e7259fb 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -800,6 +800,43 @@ func.func @fold_extract_shapecast_to_shapecast(%arg0 : vector<3x4xf32>) -> vecto // ----- +// CHECK-LABEL: func @extract_no_fold_scalar_to_0d( +// CHECK-SAME: %[[v:.*]]: vector) +// CHECK: %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector +// CHECK: return %[[extract]] +func.func @extract_no_fold_scalar_to_0d(%v: vector) -> f32 { + %0 = vector.extract %v[] : f32 from vector + return %0 : f32 +} + +// ----- + +// CHECK-LABEL: func @insert_fold_same_rank( +// CHECK-SAME: %[[v:.*]]: vector<2x2xf32>) +// CHECK: %[[CST:.+]] = arith.constant +// CHECK-SAME: : vector<2x2xf32> +// CHECK-NOT: vector.insert +// CHECK: return %[[CST]] +func.func @insert_fold_same_rank(%v: vector<2x2xf32>) -> vector<2x2xf32> { + %cst = arith.constant dense<0.000000e+00> : vector<2x2xf32> + %0 = vector.insert %cst, %v [] : vector<2x2xf32> into vector<2x2xf32> + return %0 : vector<2x2xf32> +} + +// ----- + +// CHECK-LABEL: func @insert_no_fold_scalar_to_0d( +// CHECK-SAME: %[[v:.*]]: vector) +// CHECK: %[[extract:.*]] = vector.insert %{{.*}}, %[[v]] [] : f32 into vector +// CHECK: return %[[extract]] +func.func @insert_no_fold_scalar_to_0d(%v: vector) -> vector { + %cst = arith.constant 0.000000e+00 : f32 + %0 = vector.insert %cst, %v [] : f32 into vector + return %0 : vector +} + +// ----- + // CHECK-LABEL: dont_fold_expand_collapse // CHECK: %[[A:.*]] = vector.shape_cast %{{.*}} : vector<1x1x64xf32> to vector<1x1x8x8xf32> // CHECK: %[[B:.*]] = vector.shape_cast %{{.*}} : vector<1x1x8x8xf32> to vector<8x8xf32> @@ -2606,17 +2643,6 @@ func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi3 // ----- -// CHECK-LABEL: func @extract_from_0d_regression( -// CHECK-SAME: %[[v:.*]]: vector) -// CHECK: %[[extract:.*]] = vector.extract %[[v]][] : f32 from vector -// CHECK: return %[[extract]] -func.func @extract_from_0d_regression(%v: vector) -> f32 { - %0 = vector.extract %v[] : f32 from vector - return %0 : f32 -} - -// ----- - // CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression( // CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: vector, %[[c:.*]]: vector<2xf32>) func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) { diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir new file mode 100644 index 00000000000000..7ecbad7968225d --- /dev/null +++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type-unaligned.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-opt --test-emulate-narrow-int="arith-compute-bitwidth=1 memref-load-bitwidth=8" --cse --split-input-file %s | FileCheck %s + +func.func @vector_load_i2(%arg1: index, %arg2: index) -> vector<3x3xi2> { + %0 = memref.alloc() : memref<3x3xi2> + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %cst = arith.constant dense<0> : vector<3x3xi2> + %1 = vector.load %0[%c2, %c0] : memref<3x3xi2>, vector<3xi2> + %2 = vector.insert %1, %cst [0] : vector<3xi2> into vector<3x3xi2> + return %2 : vector<3x3xi2> +} + +// CHECK: func @vector_load_i2 +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> +// CHECK: %[[INDEX:.+]] = arith.constant 1 : index +// CHECK: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref<3xi8>, vector<2xi8> +// CHECK: %[[VEC_I2:.+]] = vector.bitcast %[[VEC]] : vector<2xi8> to vector<8xi2> +// CHECK: %[[EXCTRACT:.+]] = vector.extract_strided_slice %[[VEC_I2]] {offsets = [2], sizes = [3], strides = [1]} : vector<8xi2> to vector<3xi2> + +//----- + +func.func @vector_transfer_read_i2() -> vector<3xi2> { + %0 = memref.alloc() : memref<3x3xi2> + %c0i2 = arith.constant 0 : i2 + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %1 = vector.transfer_read %0[%c2, %c0], %c0i2 {in_bounds = [true]} : memref<3x3xi2>, vector<3xi2> + return %1 : vector<3xi2> +} + +// CHECK: func @vector_transfer_read_i2 +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> +// CHECK: %[[INDEX:.+]] = arith.constant 1 : index +// CHECK: %[[READ:.+]] = vector.transfer_read %[[ALLOC]][%[[INDEX]]], %0 : memref<3xi8>, vector<2xi8> +// CHECK: %[[BITCAST:.+]] = vector.bitcast %[[READ]] : vector<2xi8> to vector<8xi2> +// CHECK: vector.extract_strided_slice %[[BITCAST]] {offsets = [2], sizes = [3], strides = [1]} : vector<8xi2> to vector<3xi2> + +//----- + +func.func @vector_cst_maskedload_i2(%passthru: vector<5xi2>) -> vector<3x5xi2> { + %0 = memref.alloc() : memref<3x5xi2> + %cst = arith.constant dense<0> : vector<3x5xi2> + %mask = vector.constant_mask [3] : vector<5xi1> + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %1 = vector.maskedload %0[%c2, %c0], %mask, %passthru : + memref<3x5xi2>, vector<5xi1>, vector<5xi2> into vector<5xi2> + %2 = vector.insert %1, %cst [0] : vector<5xi2> into vector<3x5xi2> + return %2 : vector<3x5xi2> +} + +// CHECK: func @vector_cst_maskedload_i2 +// CHECK: %[[ORIGINMASK:.+]] = vector.constant_mask [3] : vector<5xi1> +// CHECK: %[[NEWMASK:.+]] = arith.constant dense : vector<2xi1> +// CHECK: %[[VESSEL:.+]] = arith.constant dense<0> : vector<8xi2> +// CHECK: %[[INSERT1:.+]] = vector.insert_strided_slice %arg0, %[[VESSEL]] +// CHECK-SAME: {offsets = [2], strides = [1]} : vector<5xi2> into vector<8xi2> +// CHECK: %[[BITCAST1:.+]] = vector.bitcast %[[INSERT1]] : vector<8xi2> to vector<2xi8> +// CHECK: %[[C2:.+]] = arith.constant 2 : index +// CHECK: %[[MASKEDLOAD:.+]] = vector.maskedload %alloc[%[[C2]]], %[[NEWMASK:.+]], %[[BITCAST1]] +// CHECK-SAME: : memref<4xi8>, vector<2xi1>, vector<2xi8> into vector<2xi8> +// CHECK: %[[BITCAST2:.+]] = vector.bitcast %[[MASKEDLOAD]] : vector<2xi8> to vector<8xi2> +// CHECK: %[[CST2:.+]] = arith.constant dense : vector<8xi1> +// CHECK: %[[INSERT2:.+]] = vector.insert_strided_slice %[[ORIGINMASK]], %[[CST2]] +// CHECK-SAME: {offsets = [2], strides = [1]} : vector<5xi1> into vector<8xi1> +// CHECK: %[[SELECT:.+]] = arith.select %[[INSERT2]], %[[BITCAST2]], %[[INSERT1]] : vector<8xi1>, vector<8xi2> +// CHECK: vector.extract_strided_slice %[[SELECT]] {offsets = [2], sizes = [5], strides = [1]} : vector<8xi2> to vector<5xi2> diff --git a/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir index 6e8f0162e505d0..031442b0ee2daf 100644 --- a/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir +++ b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir @@ -3,7 +3,7 @@ // Test that an error is emitted when an operation is marked as "erased", but // has users that live across the conversion. func.func @remove_all_ops(%arg0: i32) -> i32 { - // expected-error@below {{failed to legalize unresolved materialization from () to 'i32' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from () to ('i32') that remained live after conversion}} %0 = "test.illegal_op_a"() : () -> i32 // expected-note@below {{see existing live user here}} return %0 : i32 diff --git a/mlir/test/Transforms/test-legalize-type-conversion.mlir b/mlir/test/Transforms/test-legalize-type-conversion.mlir index f130adff42f8cd..db8bd0f6378d29 100644 --- a/mlir/test/Transforms/test-legalize-type-conversion.mlir +++ b/mlir/test/Transforms/test-legalize-type-conversion.mlir @@ -2,7 +2,7 @@ func.func @test_invalid_arg_materialization( - // expected-error@below {{failed to legalize unresolved materialization from () to 'i16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from () to ('i16') that remained live after conversion}} %arg0: i16) { // expected-note@below{{see existing live user here}} "foo.return"(%arg0) : (i16) -> () @@ -21,7 +21,7 @@ func.func @test_valid_arg_materialization(%arg0: i64) { // ----- func.func @test_invalid_result_materialization() { - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}} %result = "test.type_producer"() : () -> f16 // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () @@ -30,7 +30,7 @@ func.func @test_invalid_result_materialization() { // ----- func.func @test_invalid_result_materialization() { - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}} %result = "test.type_producer"() : () -> f16 // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () @@ -50,7 +50,7 @@ func.func @test_transitive_use_materialization() { // ----- func.func @test_transitive_use_invalid_materialization() { - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f16' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f16') that remained live after conversion}} %result = "test.another_type_producer"() : () -> f16 // expected-note@below{{see existing live user here}} "foo.return"(%result) : (f16) -> () @@ -102,7 +102,7 @@ func.func @test_block_argument_not_converted() { // Make sure argument type changes aren't implicitly forwarded. func.func @test_signature_conversion_no_converter() { "test.signature_conversion_no_converter"() ({ - // expected-error@below {{failed to legalize unresolved materialization from ('f64') to 'f32' that remained live after conversion}} + // expected-error@below {{failed to legalize unresolved materialization from ('f64') to ('f32') that remained live after conversion}} ^bb0(%arg0: f32): "test.type_consumer"(%arg0) : (f32) -> () // expected-note@below{{see existing live user here}} diff --git a/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp b/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp index 92216da9f201e6..de511c58ae6ee0 100644 --- a/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp +++ b/mlir/test/lib/Dialect/Func/TestDecomposeCallGraphTypes.cpp @@ -21,23 +21,40 @@ namespace { /// given tuple value. If some tuple elements are, in turn, tuples, the elements /// of those are extracted recursively such that the returned values have the /// same types as `resultTypes.getFlattenedTypes()`. -static LogicalResult buildDecomposeTuple(OpBuilder &builder, Location loc, - TupleType resultType, Value value, - SmallVectorImpl &values) { - for (unsigned i = 0, e = resultType.size(); i < e; ++i) { - Type elementType = resultType.getType(i); - Value element = builder.create( - loc, elementType, value, builder.getI32IntegerAttr(i)); - if (auto nestedTupleType = dyn_cast(elementType)) { - // Recurse if the current element is also a tuple. - if (failed(buildDecomposeTuple(builder, loc, nestedTupleType, element, - values))) - return failure(); - } else { - values.push_back(element); +static SmallVector buildDecomposeTuple(OpBuilder &builder, + TypeRange resultTypes, + ValueRange inputs, Location loc) { + // Skip materialization if the single input value is not a tuple. + if (inputs.size() != 1) + return {}; + Value tuple = inputs.front(); + auto tupleType = dyn_cast(tuple.getType()); + if (!tupleType) + return {}; + // Skip materialization if the flattened types do not match the requested + // result types. + SmallVector flattenedTypes; + tupleType.getFlattenedTypes(flattenedTypes); + if (TypeRange(resultTypes) != TypeRange(flattenedTypes)) + return {}; + // Recursively decompose the tuple. + SmallVector result; + std::function decompose = [&](Value tuple) { + auto tupleType = dyn_cast(tuple.getType()); + if (!tupleType) { + // This is not a tuple. + result.push_back(tuple); + return; } - } - return success(); + for (unsigned i = 0, e = tupleType.size(); i < e; ++i) { + Type elementType = tupleType.getType(i); + Value element = builder.create( + loc, elementType, tuple, builder.getI32IntegerAttr(i)); + decompose(element); + } + }; + decompose(tuple); + return result; } /// Creates a `test.make_tuple` op out of the given inputs building a tuple of @@ -82,8 +99,8 @@ static Value buildMakeTupleOp(OpBuilder &builder, TupleType resultType, /// A pass for testing call graph type decomposition. /// -/// This instantiates the patterns with a TypeConverter and ValueDecomposer -/// that splits tuple types into their respective element types. +/// This instantiates the patterns with a TypeConverter that splits tuple types +/// into their respective element types. /// For example, `tuple --> T1, T2, T3`. struct TestDecomposeCallGraphTypes : public PassWrapper> { @@ -123,12 +140,9 @@ struct TestDecomposeCallGraphTypes return success(); }); typeConverter.addArgumentMaterialization(buildMakeTupleOp); + typeConverter.addTargetMaterialization(buildDecomposeTuple); - ValueDecomposer decomposer; - decomposer.addDecomposeValueConversion(buildDecomposeTuple); - - populateDecomposeCallGraphTypesPatterns(context, typeConverter, decomposer, - patterns); + populateDecomposeCallGraphTypesPatterns(context, typeConverter, patterns); if (failed(applyPartialConversion(module, target, std::move(patterns)))) return signalPassFailure(); diff --git a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp index 411a98a48bfb28..525c8d6d3e89bc 100644 --- a/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRIntrinsicGen.cpp @@ -76,7 +76,7 @@ static IndicesTy getOverloadableTypeIdxs(const Record &record, case llvm::MVT::iAny: case llvm::MVT::fAny: case llvm::MVT::Any: - case llvm::MVT::iPTRAny: + case llvm::MVT::pAny: case llvm::MVT::vAny: overloadedOps.set(r.index()); break; diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index db928deb170602..5e756e2ed9629f 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -364,6 +364,20 @@ gentbl( ], ) +gentbl( + name="basic_builtins_x86_gen", + tbl_outs = [( + "-gen-clang-builtins", + "include/clang/Basic/BuiltinsX86.inc", + )], + tblgen = ":clang-tblgen", + td_file = "include/clang/Basic/BuiltinsX86.td", + td_srcs = [ + "include/clang/Basic/BuiltinsX86.td", + "include/clang/Basic/BuiltinsBase.td", + ], +) + gentbl( name = "basic_builtins_gen", tbl_outs = [( @@ -701,6 +715,7 @@ cc_library( ":basic_builtins_bpf_gen", ":basic_builtins_gen", ":basic_builtins_riscv_gen", + ":basic_builtins_x86_gen", ":basic_internal_headers", ":basic_riscv_sifive_vector_builtins_gen", ":basic_riscv_vector_builtin_cg_gen",