diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
index f7cc9d19123635..b15d428326ac12 100644
--- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
+++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
@@ -271,7 +271,6 @@ class ASTWalker : public RecursiveASTVisitor<ASTWalker> {
   // specialized template. Implicit ones are filtered out by RAV.
   bool
   VisitClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *CTSD) {
-    // if (CTSD->isExplicitSpecialization())
     if (clang::isTemplateExplicitInstantiationOrSpecialization(
             CTSD->getTemplateSpecializationKind()))
       report(CTSD->getLocation(),
@@ -279,7 +278,6 @@ class ASTWalker : public RecursiveASTVisitor<ASTWalker> {
     return true;
   }
   bool VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *VTSD) {
-    // if (VTSD->isExplicitSpecialization())
     if (clang::isTemplateExplicitInstantiationOrSpecialization(
             VTSD->getTemplateSpecializationKind()))
       report(VTSD->getLocation(),
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1541b0cbf4875c..b1864901e7bddb 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -65,9 +65,20 @@ C++ Specific Potentially Breaking Changes
   `-Wno-enum-constexpr-conversion`, to allow for a transition period for users.
   Now, in Clang 20, **it is no longer possible to suppress the diagnostic**.
 
+- Extraneous template headers are now ill-formed by default.
+  This error can be disable with ``-Wno-error=extraneous-template-head``.
+
+  .. code-block:: c++
+
+    template <> // error: extraneous template head
+    template <typename T>
+    void f();
+    
 ABI Changes in This Version
 ---------------------------
 
+- Fixed Microsoft name mangling of placeholder, auto and decltype(auto), return types for MSVC 1920+. This change resolves incompatibilities with code compiled by MSVC 1920+ but will introduce incompatibilities with code compiled by earlier versions of Clang unless such code is built with the compiler option -fms-compatibility-version=19.14 to imitate the MSVC 1914 mangling behavior.
+
 AST Dumping Potentially Breaking Changes
 ----------------------------------------
 
@@ -383,6 +394,36 @@ Moved checkers
 Sanitizers
 ----------
 
+- Added the ``-fsanitize-overflow-pattern-exclusion=`` flag which can be used
+  to disable specific overflow-dependent code patterns. The supported patterns
+  are: ``add-overflow-test``, ``negated-unsigned-const``, and
+  ``post-decr-while``. The sanitizer instrumentation can be toggled off for all
+  available patterns by specifying ``all``. Conversely, you can disable all
+  exclusions with ``none``.
+
+  .. code-block:: c++
+
+     /// specified with ``-fsanitize-overflow-pattern-exclusion=add-overflow-test``
+     int common_overflow_check_pattern(unsigned base, unsigned offset) {
+       if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings, won't be instrumented
+     }
+
+     /// specified with ``-fsanitize-overflow-pattern-exclusion=negated-unsigned-const``
+     void negation_overflow() {
+       unsigned long foo = -1UL; // No longer causes a negation overflow warning
+       unsigned long bar = -2UL; // and so on...
+     }
+
+     /// specified with ``-fsanitize-overflow-pattern-exclusion=post-decr-while``
+     void while_post_decrement() {
+       unsigned char count = 16;
+       while (count--) { /* ... */} // No longer causes unsigned-integer-overflow sanitizer to trip
+     }
+
+  Many existing projects have a large amount of these code patterns present.
+  This new flag should allow those projects to enable integer sanitizers with
+  less noise.
+
 Python Binding Changes
 ----------------------
 - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``.
diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 531d56e313826c..9f3d980eefbea7 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -293,6 +293,48 @@ To silence reports from unsigned integer overflow, you can set
 ``-fsanitize-recover=unsigned-integer-overflow``, is particularly useful for
 providing fuzzing signal without blowing up logs.
 
+Disabling instrumentation for common overflow patterns
+------------------------------------------------------
+
+There are certain overflow-dependent or overflow-prone code patterns which
+produce a lot of noise for integer overflow/truncation sanitizers. Negated
+unsigned constants, post-decrements in a while loop condition and simple
+overflow checks are accepted and pervasive code patterns. However, the signal
+received from sanitizers instrumenting these code patterns may be too noisy for
+some projects. To disable instrumentation for these common patterns one should
+use ``-fsanitize-overflow-pattern-exclusion=``.
+
+Currently, this option supports three overflow-dependent code idioms:
+
+``negated-unsigned-const``
+
+.. code-block:: c++
+
+    /// -fsanitize-overflow-pattern-exclusion=negated-unsigned-const
+    unsigned long foo = -1UL; // No longer causes a negation overflow warning
+    unsigned long bar = -2UL; // and so on...
+
+``post-decr-while``
+
+.. code-block:: c++
+
+    /// -fsanitize-overflow-pattern-exclusion=post-decr-while
+    unsigned char count = 16;
+    while (count--) { /* ... */ } // No longer causes unsigned-integer-overflow sanitizer to trip
+
+``add-overflow-test``
+
+.. code-block:: c++
+
+    /// -fsanitize-overflow-pattern-exclusion=add-overflow-test
+    if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings,
+                                            // won't be instrumented (same for signed types)
+
+You can enable all exclusions with
+``-fsanitize-overflow-pattern-exclusion=all`` or disable all exclusions with
+``-fsanitize-overflow-pattern-exclusion=none``. Specifying ``none`` has
+precedence over other values.
+
 Issue Suppression
 =================
 
diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 561a9d872acfb0..6d84bd03de810a 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -3206,6 +3206,10 @@ class FieldDecl : public DeclaratorDecl, public Mergeable<FieldDecl> {
   /// Set the C++11 in-class initializer for this member.
   void setInClassInitializer(Expr *NewInit);
 
+  /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns
+  /// \p nullptr if either the attribute or the field doesn't exist.
+  const FieldDecl *findCountedByField() const;
+
 private:
   void setLazyInClassInitializer(LazyDeclStmtPtr NewInit);
 
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 5b813bfc2faf90..f5863524723a2e 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4043,6 +4043,15 @@ class BinaryOperator : public Expr {
   void setHasStoredFPFeatures(bool B) { BinaryOperatorBits.HasFPFeatures = B; }
   bool hasStoredFPFeatures() const { return BinaryOperatorBits.HasFPFeatures; }
 
+  /// Set and get the bit that informs arithmetic overflow sanitizers whether
+  /// or not they should exclude certain BinaryOperators from instrumentation
+  void setExcludedOverflowPattern(bool B) {
+    BinaryOperatorBits.ExcludedOverflowPattern = B;
+  }
+  bool hasExcludedOverflowPattern() const {
+    return BinaryOperatorBits.ExcludedOverflowPattern;
+  }
+
   /// Get FPFeatures from trailing storage
   FPOptionsOverride getStoredFPFeatures() const {
     assert(hasStoredFPFeatures());
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index bbd7634bcc3bfb..f1a2aac0a8b2f8 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -650,6 +650,11 @@ class alignas(void *) Stmt {
     LLVM_PREFERRED_TYPE(bool)
     unsigned HasFPFeatures : 1;
 
+    /// Whether or not this BinaryOperator should be excluded from integer
+    /// overflow sanitization.
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned ExcludedOverflowPattern : 1;
+
     SourceLocation OpLoc;
   };
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c710c3360be7da..da2f939067bfab 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5428,7 +5428,8 @@ def err_template_spec_extra_headers : Error<
   "extraneous template parameter list in template specialization or "
   "out-of-line template definition">;
 def ext_template_spec_extra_headers : ExtWarn<
-  "extraneous template parameter list in template specialization">;
+  "extraneous template parameter list in template specialization">,
+   InGroup<DiagGroup<"extraneous-template-head">>, DefaultError;
 def note_explicit_template_spec_does_not_need_header : Note<
   "'template<>' header not required for explicitly-specialized class %0 "
   "declared here">;
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index d454a7ff2f8cf4..2e9f2c552aad8a 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -406,6 +406,8 @@ VALUE_LANGOPT(TrivialAutoVarInitMaxSize, 32, 0,
              "stop trivial automatic variable initialization if var size exceeds the specified size (in bytes). Must be greater than 0.")
 ENUM_LANGOPT(SignedOverflowBehavior, SignedOverflowBehaviorTy, 2, SOB_Undefined,
              "signed integer overflow handling")
+LANGOPT(IgnoreNegationOverflow, 1, 0, "ignore overflow caused by negation")
+LANGOPT(SanitizeOverflowIdioms, 1, 1, "enable instrumentation for common overflow idioms")
 ENUM_LANGOPT(ThreadModel  , ThreadModelKind, 2, ThreadModelKind::POSIX, "Thread Model")
 
 BENIGN_LANGOPT(ArrowDepth, 32, 256,
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 91f1c2f2e6239e..eb4cb4b5a7e93f 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -367,6 +367,21 @@ class LangOptionsBase {
     PerThread,
   };
 
+  /// Exclude certain code patterns from being instrumented by arithmetic
+  /// overflow sanitizers
+  enum OverflowPatternExclusionKind {
+    /// Don't exclude any overflow patterns from sanitizers
+    None = 1 << 0,
+    /// Exclude all overflow patterns (below)
+    All = 1 << 1,
+    /// if (a + b < a)
+    AddOverflowTest = 1 << 2,
+    /// -1UL
+    NegUnsignedConst = 1 << 3,
+    /// while (count--)
+    PostDecrInWhile = 1 << 4,
+  };
+
   enum class DefaultVisiblityExportMapping {
     None,
     /// map only explicit default visibilities to exported
@@ -555,6 +570,11 @@ class LangOptions : public LangOptionsBase {
   /// The default stream kind used for HIP kernel launching.
   GPUDefaultStreamKind GPUDefaultStream;
 
+  /// Which overflow patterns should be excluded from sanitizer instrumentation
+  unsigned OverflowPatternExclusionMask = 0;
+
+  std::vector<std::string> OverflowPatternExclusionValues;
+
   /// The seed used by the randomize structure layout feature.
   std::string RandstructSeed;
 
@@ -630,6 +650,14 @@ class LangOptions : public LangOptionsBase {
     return MSCompatibilityVersion >= MajorVersion * 100000U;
   }
 
+  bool isOverflowPatternExcluded(OverflowPatternExclusionKind Kind) const {
+    if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::None)
+      return false;
+    if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::All)
+      return true;
+    return OverflowPatternExclusionMask & Kind;
+  }
+
   /// Reset all of the options that are not considered when building a
   /// module.
   void resetNonModularOptions();
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 6df3a6a5943a97..acc1f2fde53979 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2565,6 +2565,11 @@ defm sanitize_stats : BoolOption<"f", "sanitize-stats",
           "Disable">,
   BothFlags<[], [ClangOption], " sanitizer statistics gathering.">>,
   Group<f_clang_Group>;
+def fsanitize_overflow_pattern_exclusion_EQ : CommaJoined<["-"], "fsanitize-overflow-pattern-exclusion=">,
+  HelpText<"Specify the overflow patterns to exclude from artihmetic sanitizer instrumentation">,
+  Visibility<[ClangOption, CC1Option]>,
+  Values<"none,all,add-overflow-test,negated-unsigned-const,post-decr-while">,
+  MarshallingInfoStringVector<LangOpts<"OverflowPatternExclusionValues">>;
 def fsanitize_thread_memory_access : Flag<["-"], "fsanitize-thread-memory-access">,
                                      Group<f_clang_Group>,
                                      HelpText<"Enable memory access instrumentation in ThreadSanitizer (default)">;
diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h
index 47ef175302679f..e64ec463ca8907 100644
--- a/clang/include/clang/Driver/SanitizerArgs.h
+++ b/clang/include/clang/Driver/SanitizerArgs.h
@@ -33,6 +33,7 @@ class SanitizerArgs {
   std::vector<std::string> BinaryMetadataIgnorelistFiles;
   int CoverageFeatures = 0;
   int BinaryMetadataFeatures = 0;
+  int OverflowPatternExclusions = 0;
   int MsanTrackOrigins = 0;
   bool MsanUseAfterDtor = true;
   bool MsanParamRetval = true;
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index e125143bc1b270..90caf81757ac96 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4678,6 +4678,19 @@ void FieldDecl::printName(raw_ostream &OS, const PrintingPolicy &Policy) const {
   DeclaratorDecl::printName(OS, Policy);
 }
 
+const FieldDecl *FieldDecl::findCountedByField() const {
+  const auto *CAT = getType()->getAs<CountAttributedType>();
+  if (!CAT)
+    return nullptr;
+
+  const auto *CountDRE = cast<DeclRefExpr>(CAT->getCountExpr());
+  const auto *CountDecl = CountDRE->getDecl();
+  if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountDecl))
+    CountDecl = IFD->getAnonField();
+
+  return dyn_cast<FieldDecl>(CountDecl);
+}
+
 //===----------------------------------------------------------------------===//
 // TagDecl Implementation
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 9d5b8167d0ee62..57475c66a94e35 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4759,6 +4759,53 @@ ParenListExpr *ParenListExpr::CreateEmpty(const ASTContext &Ctx,
   return new (Mem) ParenListExpr(EmptyShell(), NumExprs);
 }
 
+/// Certain overflow-dependent code patterns can have their integer overflow
+/// sanitization disabled. Check for the common pattern `if (a + b < a)` and
+/// return the resulting BinaryOperator responsible for the addition so we can
+/// elide overflow checks during codegen.
+static std::optional<BinaryOperator *>
+getOverflowPatternBinOp(const BinaryOperator *E) {
+  Expr *Addition, *ComparedTo;
+  if (E->getOpcode() == BO_LT) {
+    Addition = E->getLHS();
+    ComparedTo = E->getRHS();
+  } else if (E->getOpcode() == BO_GT) {
+    Addition = E->getRHS();
+    ComparedTo = E->getLHS();
+  } else {
+    return {};
+  }
+
+  const Expr *AddLHS = nullptr, *AddRHS = nullptr;
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(Addition);
+
+  if (BO && BO->getOpcode() == clang::BO_Add) {
+    // now store addends for lookup on other side of '>'
+    AddLHS = BO->getLHS();
+    AddRHS = BO->getRHS();
+  }
+
+  if (!AddLHS || !AddRHS)
+    return {};
+
+  const Decl *LHSDecl, *RHSDecl, *OtherDecl;
+
+  LHSDecl = AddLHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee();
+  RHSDecl = AddRHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee();
+  OtherDecl = ComparedTo->IgnoreParenImpCasts()->getReferencedDeclOfCallee();
+
+  if (!OtherDecl)
+    return {};
+
+  if (!LHSDecl && !RHSDecl)
+    return {};
+
+  if ((LHSDecl && LHSDecl == OtherDecl && LHSDecl != RHSDecl) ||
+      (RHSDecl && RHSDecl == OtherDecl && RHSDecl != LHSDecl))
+    return BO;
+  return {};
+}
+
 BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs,
                                Opcode opc, QualType ResTy, ExprValueKind VK,
                                ExprObjectKind OK, SourceLocation opLoc,
@@ -4768,8 +4815,15 @@ BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs,
   assert(!isCompoundAssignmentOp() &&
          "Use CompoundAssignOperator for compound assignments");
   BinaryOperatorBits.OpLoc = opLoc;
+  BinaryOperatorBits.ExcludedOverflowPattern = 0;
   SubExprs[LHS] = lhs;
   SubExprs[RHS] = rhs;
+  if (Ctx.getLangOpts().isOverflowPatternExcluded(
+          LangOptions::OverflowPatternExclusionKind::AddOverflowTest)) {
+    std::optional<BinaryOperator *> Result = getOverflowPatternBinOp(this);
+    if (Result.has_value())
+      Result.value()->BinaryOperatorBits.ExcludedOverflowPattern = 1;
+  }
   BinaryOperatorBits.HasFPFeatures = FPFeatures.requiresTrailingStorage();
   if (hasStoredFPFeatures())
     setStoredFPFeatures(FPFeatures);
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index ed8d1cf1b98dd8..db8000e25dc7cc 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -408,6 +408,8 @@ class MicrosoftCXXNameMangler {
   void mangleSourceName(StringRef Name);
   void mangleNestedName(GlobalDecl GD);
 
+  void mangleAutoReturnType(QualType T, QualifierMangleMode QMM);
+
 private:
   bool isStructorDecl(const NamedDecl *ND) const {
     return ND == Structor || getStructor(ND) == Structor;
@@ -477,6 +479,11 @@ class MicrosoftCXXNameMangler {
                           SourceRange Range);
   void mangleObjCKindOfType(const ObjCObjectType *T, Qualifiers Quals,
                             SourceRange Range);
+
+  void mangleAutoReturnType(const MemberPointerType *T, Qualifiers Quals);
+  void mangleAutoReturnType(const PointerType *T, Qualifiers Quals);
+  void mangleAutoReturnType(const LValueReferenceType *T, Qualifiers Quals);
+  void mangleAutoReturnType(const RValueReferenceType *T, Qualifiers Quals);
 };
 }
 
@@ -2494,6 +2501,57 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
   mangleArtificialTagType(TagTypeKind::Struct, ASMangling, {"__clang"});
 }
 
+void MicrosoftCXXNameMangler::mangleAutoReturnType(QualType T,
+                                                   QualifierMangleMode QMM) {
+  assert(getASTContext().getLangOpts().isCompatibleWithMSVC(
+             LangOptions::MSVC2019) &&
+         "Cannot mangle MSVC 2017 auto return types!");
+
+  if (isa<AutoType>(T)) {
+    const auto *AT = T->getContainedAutoType();
+    Qualifiers Quals = T.getLocalQualifiers();
+
+    if (QMM == QMM_Result)
+      Out << '?';
+    if (QMM != QMM_Drop)
+      mangleQualifiers(Quals, false);
+    Out << (AT->isDecltypeAuto() ? "_T" : "_P");
+    return;
+  }
+
+  T = T.getDesugaredType(getASTContext());
+  Qualifiers Quals = T.getLocalQualifiers();
+
+  switch (QMM) {
+  case QMM_Drop:
+  case QMM_Result:
+    break;
+  case QMM_Mangle:
+    mangleQualifiers(Quals, false);
+    break;
+  default:
+    llvm_unreachable("QMM_Escape unexpected");
+  }
+
+  const Type *ty = T.getTypePtr();
+  switch (ty->getTypeClass()) {
+  case Type::MemberPointer:
+    mangleAutoReturnType(cast<MemberPointerType>(ty), Quals);
+    break;
+  case Type::Pointer:
+    mangleAutoReturnType(cast<PointerType>(ty), Quals);
+    break;
+  case Type::LValueReference:
+    mangleAutoReturnType(cast<LValueReferenceType>(ty), Quals);
+    break;
+  case Type::RValueReference:
+    mangleAutoReturnType(cast<RValueReferenceType>(ty), Quals);
+    break;
+  default:
+    llvm_unreachable("Invalid type expected");
+  }
+}
+
 void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range,
                                          QualifierMangleMode QMM) {
   // Don't use the canonical types.  MSVC includes things like 'const' on
@@ -2907,17 +2965,52 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
       // can differ by their calling convention and are typically deduced.  So
       // we make sure that this type gets mangled properly.
       mangleType(ResultType, Range, QMM_Result);
-    } else if (const auto *AT = dyn_cast_or_null<AutoType>(
-                   ResultType->getContainedAutoType())) {
-      Out << '?';
-      mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
-      Out << '?';
+    } else if (IsInLambda) {
+      if (const auto *AT = ResultType->getContainedAutoType()) {
+        assert(AT->getKeyword() == AutoTypeKeyword::Auto &&
+               "should only need to mangle auto!");
+        (void)AT;
+        Out << '?';
+        mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
+        Out << '?';
+        mangleSourceName("<auto>");
+        Out << '@';
+      } else {
+        Out << '@';
+      }
+    } else if (const auto *AT = ResultType->getContainedAutoType()) {
       assert(AT->getKeyword() != AutoTypeKeyword::GNUAutoType &&
              "shouldn't need to mangle __auto_type!");
-      mangleSourceName(AT->isDecltypeAuto() ? "<decltype-auto>" : "<auto>");
-      Out << '@';
-    } else if (IsInLambda) {
-      Out << '@';
+
+      // If we have any pointer types with the clang address space extension
+      // then defer to the custom clang mangling to keep backwards
+      // compatibility. See `mangleType(const PointerType *T, Qualifiers Quals,
+      // SourceRange Range)` for details.
+      auto UseClangMangling = [](QualType ResultType) {
+        QualType T = ResultType;
+        while (isa<PointerType>(T.getTypePtr())) {
+          T = T->getPointeeType();
+          if (T.getQualifiers().hasAddressSpace())
+            return true;
+        }
+        return false;
+      };
+
+      if (getASTContext().getLangOpts().isCompatibleWithMSVC(
+              LangOptions::MSVC2019) &&
+          !UseClangMangling(ResultType)) {
+        if (D && !D->getPrimaryTemplate()) {
+          Out << '@';
+        } else {
+          mangleAutoReturnType(ResultType, QMM_Result);
+        }
+      } else {
+        Out << '?';
+        mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
+        Out << '?';
+        mangleSourceName(AT->isDecltypeAuto() ? "<decltype-auto>" : "<auto>");
+        Out << '@';
+      }
     } else {
       if (ResultType->isVoidType())
         ResultType = ResultType.getUnqualifiedType();
@@ -4220,6 +4313,57 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
   Mangler.getStream() << '@';
 }
 
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const MemberPointerType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
+  if (const FunctionProtoType *FPT = PointeeType->getAs<FunctionProtoType>()) {
+    Out << '8';
+    mangleName(T->getClass()->castAs<RecordType>()->getDecl());
+    mangleFunctionType(FPT, nullptr, true);
+  } else {
+    mangleQualifiers(PointeeType.getQualifiers(), true);
+    mangleName(T->getClass()->castAs<RecordType>()->getDecl());
+    mangleAutoReturnType(PointeeType, QMM_Drop);
+  }
+}
+
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const PointerType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  assert(!PointeeType.getQualifiers().hasAddressSpace() &&
+         "Unexpected address space mangling required");
+
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
+
+  if (const FunctionProtoType *FPT = PointeeType->getAs<FunctionProtoType>()) {
+    Out << '6';
+    mangleFunctionType(FPT);
+  } else {
+    mangleAutoReturnType(PointeeType, QMM_Mangle);
+  }
+}
+
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const LValueReferenceType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!");
+  Out << 'A';
+  manglePointerExtQualifiers(Quals, PointeeType);
+  mangleAutoReturnType(PointeeType, QMM_Mangle);
+}
+
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const RValueReferenceType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!");
+  Out << "$$Q";
+  manglePointerExtQualifiers(Quals, PointeeType);
+  mangleAutoReturnType(PointeeType, QMM_Mangle);
+}
+
 MicrosoftMangleContext *MicrosoftMangleContext::create(ASTContext &Context,
                                                        DiagnosticsEngine &Diags,
                                                        bool IsAux) {
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b5e5240e55be3f..1c0baeaee03632 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -987,7 +987,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
     // attribute.
     return nullptr;
 
-  const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
+  const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
   if (!CountedByFD)
     // Can't find the field referenced by the "counted_by" attribute.
     return nullptr;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f93f8dda0bd29a..0672861790633b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1150,22 +1150,6 @@ llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
                                    getIntAlign(), "..counted_by.load");
 }
 
-const FieldDecl *CodeGenFunction::FindCountedByField(const FieldDecl *FD) {
-  if (!FD)
-    return nullptr;
-
-  const auto *CAT = FD->getType()->getAs<CountAttributedType>();
-  if (!CAT)
-    return nullptr;
-
-  const auto *CountDRE = cast<DeclRefExpr>(CAT->getCountExpr());
-  const auto *CountDecl = CountDRE->getDecl();
-  if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountDecl))
-    CountDecl = IFD->getAnonField();
-
-  return dyn_cast<FieldDecl>(CountDecl);
-}
-
 void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
                                       llvm::Value *Index, QualType IndexType,
                                       bool Accessed) {
@@ -4305,7 +4289,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
           ME->isFlexibleArrayMemberLike(getContext(), StrictFlexArraysLevel) &&
           ME->getMemberDecl()->getType()->isCountAttributedType()) {
         const FieldDecl *FAMDecl = dyn_cast<FieldDecl>(ME->getMemberDecl());
-        if (const FieldDecl *CountFD = FindCountedByField(FAMDecl)) {
+        if (const FieldDecl *CountFD = FAMDecl->findCountedByField()) {
           if (std::optional<int64_t> Diff =
                   getOffsetDifferenceInBits(*this, CountFD, FAMDecl)) {
             CharUnits OffsetDiff = CGM.getContext().toCharUnitsFromBits(*Diff);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 84392745ea6144..6eac2b4c54e1ba 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -24,6 +24,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ParentMapContext.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/CodeGenOptions.h"
@@ -195,13 +196,24 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) {
   if (!Op.mayHaveIntegerOverflow())
     return true;
 
+  const UnaryOperator *UO = dyn_cast<UnaryOperator>(Op.E);
+
+  if (UO && UO->getOpcode() == UO_Minus &&
+      Ctx.getLangOpts().isOverflowPatternExcluded(
+          LangOptions::OverflowPatternExclusionKind::NegUnsignedConst) &&
+      UO->isIntegerConstantExpr(Ctx))
+    return true;
+
   // If a unary op has a widened operand, the op cannot overflow.
-  if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
+  if (UO)
     return !UO->canOverflow();
 
   // We usually don't need overflow checks for binops with widened operands.
   // Multiplication with promoted unsigned operands is a special case.
   const auto *BO = cast<BinaryOperator>(Op.E);
+  if (BO->hasExcludedOverflowPattern())
+    return true;
+
   auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS());
   if (!OptionalLHSTy)
     return false;
@@ -2766,6 +2778,26 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
   llvm_unreachable("Unknown SignedOverflowBehaviorTy");
 }
 
+/// For the purposes of overflow pattern exclusion, does this match the
+/// "while(i--)" pattern?
+static bool matchesPostDecrInWhile(const UnaryOperator *UO, bool isInc,
+                                   bool isPre, ASTContext &Ctx) {
+  if (isInc || isPre)
+    return false;
+
+  // -fsanitize-overflow-pattern-exclusion=post-decr-while
+  if (!Ctx.getLangOpts().isOverflowPatternExcluded(
+          LangOptions::OverflowPatternExclusionKind::PostDecrInWhile))
+    return false;
+
+  // all Parents (usually just one) must be a WhileStmt
+  for (const auto &Parent : Ctx.getParentMapContext().getParents(*UO))
+    if (!Parent.get<WhileStmt>())
+      return false;
+
+  return true;
+}
+
 namespace {
 /// Handles check and update for lastprivate conditional variables.
 class OMPLastprivateConditionalUpdateRAII {
@@ -2877,6 +2909,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
   } else if (type->isIntegerType()) {
     QualType promotedType;
     bool canPerformLossyDemotionCheck = false;
+
+    bool excludeOverflowPattern =
+        matchesPostDecrInWhile(E, isInc, isPre, CGF.getContext());
+
     if (CGF.getContext().isPromotableIntegerType(type)) {
       promotedType = CGF.getContext().getPromotedIntegerType(type);
       assert(promotedType != type && "Shouldn't promote to the same type.");
@@ -2936,7 +2972,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) {
       value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
     } else if (E->canOverflow() && type->isUnsignedIntegerType() &&
-               CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
+               CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+               !excludeOverflowPattern) {
       value = EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(
           E, value, isInc, E->getFPFeaturesInEffect(CGF.getLangOpts())));
     } else {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 19a7feeb69d820..57e0b7f91e9bf8 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3305,10 +3305,6 @@ class CodeGenFunction : public CodeGenTypeCache {
                                         const FieldDecl *FAMDecl,
                                         uint64_t &Offset);
 
-  /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns
-  /// \p nullptr if either the attribute or the field doesn't exist.
-  const FieldDecl *FindCountedByField(const FieldDecl *FD);
-
   /// Build an expression accessing the "counted_by" field.
   llvm::Value *EmitLoadOfCountedByField(const Expr *Base,
                                         const FieldDecl *FAMDecl,
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index e12416e51f8d24..5b95019c25cab6 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2271,8 +2271,7 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
     return false;
   }
 
-  if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
-    ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
+  auto initializeTargets = [&]() {
     const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
     // The 'Darwin' toolchain is initialized only when its arguments are
     // computed. Get the default arguments for OFK_None to ensure that
@@ -2282,6 +2281,12 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
     // FIXME: For some more esoteric targets the default toolchain is not the
     //        correct one.
     C.getArgsForToolChain(&TC, Triple.getArchName(), Action::OFK_None);
+    return Triple;
+  };
+
+  if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
+    ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
+    const llvm::Triple Triple = initializeTargets();
     RegisterEffectiveTriple TripleRAII(TC, Triple);
     switch (RLT) {
     case ToolChain::RLT_CompilerRT:
@@ -2325,7 +2330,9 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
   }
 
   if (C.getArgs().hasArg(options::OPT_print_target_triple)) {
-    llvm::outs() << TC.getTripleString() << "\n";
+    initializeTargets();
+    llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
+    llvm::outs() << Triple.getTriple() << "\n";
     return false;
   }
 
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 1fd870b72286e5..a63ee944fd1bb4 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -119,6 +119,10 @@ static SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
 static int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A,
                                  bool DiagnoseErrors);
 
+static int parseOverflowPatternExclusionValues(const Driver &D,
+                                               const llvm::opt::Arg *A,
+                                               bool DiagnoseErrors);
+
 /// Parse -f(no-)?sanitize-metadata= flag values, diagnosing any invalid
 /// components. Returns OR of members of \c BinaryMetadataFeature enumeration.
 static int parseBinaryMetadataFeatures(const Driver &D, const llvm::opt::Arg *A,
@@ -788,6 +792,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
           << "fsanitize-trap=cfi";
   }
 
+  for (const auto *Arg :
+       Args.filtered(options::OPT_fsanitize_overflow_pattern_exclusion_EQ)) {
+    Arg->claim();
+    OverflowPatternExclusions |=
+        parseOverflowPatternExclusionValues(D, Arg, DiagnoseErrors);
+  }
+
   // Parse -f(no-)?sanitize-coverage flags if coverage is supported by the
   // enabled sanitizers.
   for (const auto *Arg : Args) {
@@ -1241,6 +1252,10 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
   addSpecialCaseListOpt(Args, CmdArgs,
                         "-fsanitize-system-ignorelist=", SystemIgnorelistFiles);
 
+  if (OverflowPatternExclusions)
+    Args.AddAllArgs(CmdArgs,
+                    options::OPT_fsanitize_overflow_pattern_exclusion_EQ);
+
   if (MsanTrackOrigins)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" +
                                          Twine(MsanTrackOrigins)));
@@ -1426,6 +1441,28 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
   return Kinds;
 }
 
+static int parseOverflowPatternExclusionValues(const Driver &D,
+                                               const llvm::opt::Arg *A,
+                                               bool DiagnoseErrors) {
+  int Exclusions = 0;
+  for (int i = 0, n = A->getNumValues(); i != n; ++i) {
+    const char *Value = A->getValue(i);
+    int E =
+        llvm::StringSwitch<int>(Value)
+            .Case("none", LangOptionsBase::None)
+            .Case("all", LangOptionsBase::All)
+            .Case("add-overflow-test", LangOptionsBase::AddOverflowTest)
+            .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst)
+            .Case("post-decr-while", LangOptionsBase::PostDecrInWhile)
+            .Default(0);
+    if (E == 0)
+      D.Diag(clang::diag::err_drv_unsupported_option_argument)
+          << A->getSpelling() << Value;
+    Exclusions |= E;
+  }
+  return Exclusions;
+}
+
 int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A,
                           bool DiagnoseErrors) {
   assert(A->getOption().matches(options::OPT_fsanitize_coverage) ||
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 96aa930ea28612..f2bc11839edd4d 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7769,6 +7769,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);
   }
 
+  Args.AddAllArgs(CmdArgs,
+                  options::OPT_fsanitize_overflow_pattern_exclusion_EQ);
+
   Args.AddLastArg(CmdArgs, options::OPT_foffload_uniform_block,
                   options::OPT_fno_offload_uniform_block);
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index e3911c281985b7..5a5f5cb79a12f2 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4267,6 +4267,19 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val;
   }
 
+  if (auto *A = Args.getLastArg(OPT_fsanitize_overflow_pattern_exclusion_EQ)) {
+    for (int i = 0, n = A->getNumValues(); i != n; ++i) {
+      Opts.OverflowPatternExclusionMask |=
+          llvm::StringSwitch<unsigned>(A->getValue(i))
+              .Case("none", LangOptionsBase::None)
+              .Case("all", LangOptionsBase::All)
+              .Case("add-overflow-test", LangOptionsBase::AddOverflowTest)
+              .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst)
+              .Case("post-decr-while", LangOptionsBase::PostDecrInWhile)
+              .Default(0);
+    }
+  }
+
   // Parse -fsanitize= arguments.
   parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
                       Diags, Opts.Sanitize);
diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 04019165068668..44c6e9a4e48d1b 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -12,6 +12,10 @@
 #error "This file is for CUDA compilation only."
 #endif
 
+// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
+// libcalls reach the link step instead of being eagerly replaced.
+#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
+
 #ifndef __OPENMP_NVPTX__
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
@@ -345,4 +349,5 @@ __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")
 
+#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
 #endif // __CLANG_CUDA_MATH_H__
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 11e1e7d032586f..8468751d9de260 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -13,6 +13,10 @@
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
+// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
+// libcalls reach the link step instead of being eagerly replaced.
+#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
+
 #if !defined(__HIPCC_RTC__)
 #include <limits.h>
 #include <stdint.h>
@@ -1321,4 +1325,5 @@ __host__ inline static int max(int __arg1, int __arg2) {
 #pragma pop_macro("__RETURN_TYPE")
 #pragma pop_macro("__FAST_OR_SLOW")
 
+#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
 #endif // __CLANG_HIP_MATH_H__
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index a33f2a41a65497..8ae07907a04aba 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1128,6 +1128,7 @@ void ASTStmtReader::VisitBinaryOperator(BinaryOperator *E) {
       (BinaryOperator::Opcode)CurrentUnpackingBits->getNextBits(/*Width=*/6));
   bool hasFP_Features = CurrentUnpackingBits->getNextBit();
   E->setHasStoredFPFeatures(hasFP_Features);
+  E->setExcludedOverflowPattern(CurrentUnpackingBits->getNextBit());
   E->setLHS(Record.readSubExpr());
   E->setRHS(Record.readSubExpr());
   E->setOperatorLoc(readSourceLocation());
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 038616a675b727..c292d0a789c7cd 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1063,6 +1063,7 @@ void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) {
   CurrentPackingBits.addBits(E->getOpcode(), /*Width=*/6);
   bool HasFPFeatures = E->hasStoredFPFeatures();
   CurrentPackingBits.addBit(HasFPFeatures);
+  CurrentPackingBits.addBit(E->hasExcludedOverflowPattern());
   Record.AddStmt(E->getLHS());
   Record.AddStmt(E->getRHS());
   Record.AddSourceLocation(E->getOperatorLoc());
diff --git a/clang/test/CodeGen/X86/cmpccxadd-builtins.c b/clang/test/CodeGen/X86/cmpccxadd-builtins.c
index 6daed3a1b17b67..f058dc9b2baa46 100644
--- a/clang/test/CodeGen/X86/cmpccxadd-builtins.c
+++ b/clang/test/CodeGen/X86/cmpccxadd-builtins.c
@@ -52,50 +52,50 @@ long long test_cmplxadd64(void *__A, long long __B, long long __C) {
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NB);
 }
 
-int test_cmpnbexadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnbexadd32(
+int test_cmpaxadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpaxadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 4)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_Z);
 }
 
-long long test_cmpnbexadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnbexadd64(
+long long test_cmpaxadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpaxadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 4)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_Z);
 }
 
-int test_cmpnbxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnbxadd32(
+int test_cmpaexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpaexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 5)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NZ);
 }
 
-long long test_cmpnbxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnbxadd64(
+long long test_cmpaexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpaexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 5)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NZ);
 }
 
-int test_cmpnlexadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnlexadd32(
+int test_cmpgxadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpgxadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 6)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_BE);
 }
 
-long long test_cmpnlexadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnlexadd64(
+long long test_cmpgxadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpgxadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 6)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_BE);
 }
 
-int test_cmpnlxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnlxadd32(
+int test_cmpgexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpgexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 7)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NBE);
 }
 
-long long test_cmpnlxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnlxadd64(
+long long test_cmpgexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpgexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 7)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NBE);
 }
@@ -136,14 +136,14 @@ long long test_cmpnsxadd64(void *__A, long long __B, long long __C) {
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_P);
 }
 
-int test_cmpnzxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnzxadd32(
+int test_cmpnexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpnexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 11)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NP);
 }
 
-long long test_cmpnzxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnzxadd64(
+long long test_cmpnexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpnexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 11)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NP);
 }
@@ -184,14 +184,14 @@ long long test_cmpsxadd64(void *__A, long long __B, long long __C) {
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_LE);
 }
 
-int test_cmpzxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpzxadd32(
+int test_cmpexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 15)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NLE);
 }
 
-long long test_cmpzxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpzxadd64(
+long long test_cmpexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 15)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NLE);
 }
diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
new file mode 100644
index 00000000000000..511a88cc7a2836
--- /dev/null
+++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s
+
+// Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns
+extern unsigned a, b, c;
+extern int u, v, w;
+
+extern unsigned some(void);
+
+// Make sure all these still have handler paths, we shouldn't be excluding
+// instrumentation of any "near" patterns.
+// CHECK-LABEL: close_but_not_quite
+void close_but_not_quite(void) {
+  // CHECK: br i1{{.*}}handler.
+  if (a + b > a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a - b < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + b < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + b + 1 < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  // CHECK: br i1{{.*}}handler.
+  if (a + b < a + 1)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (b >= a + b)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + a < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + b == a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler
+  // Although this can never actually overflow we are still checking that the
+  // sanitizer instruments it.
+  while (--a)
+    some();
+}
+
+// cvise'd kernel code that caused problems during development
+typedef unsigned _size_t;
+typedef enum { FSE_repeat_none } FSE_repeat;
+typedef enum { ZSTD_defaultAllowed } ZSTD_defaultPolicy_e;
+FSE_repeat ZSTD_selectEncodingType_repeatMode;
+ZSTD_defaultPolicy_e ZSTD_selectEncodingType_isDefaultAllowed;
+_size_t ZSTD_NCountCost(void);
+
+// CHECK-LABEL: ZSTD_selectEncodingType
+// CHECK: br i1{{.*}}handler
+void ZSTD_selectEncodingType(void) {
+  _size_t basicCost =
+             ZSTD_selectEncodingType_isDefaultAllowed ? ZSTD_NCountCost() : 0,
+         compressedCost = 3 + ZSTD_NCountCost();
+  if (basicCost <= compressedCost)
+    ZSTD_selectEncodingType_repeatMode = FSE_repeat_none;
+}
+
+// CHECK-LABEL: function_calls
+void function_calls(void) {
+  // CHECK: br i1{{.*}}handler
+  if (some() + b < some())
+    c = 9;
+}
+
+// CHECK-LABEL: not_quite_a_negated_unsigned_const
+void not_quite_a_negated_unsigned_const(void) {
+  // CHECK: br i1{{.*}}handler
+  a = -b;
+}
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp
new file mode 100644
index 00000000000000..737c9c407f4703
--- /dev/null
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp
@@ -0,0 +1,369 @@
+// RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19.20 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-windows-msvc | FileCheck %s
+
+struct StructA {};
+
+template<class T>
+auto AutoT() { return T(); }
+
+template<class T>
+const auto AutoConstT() { return T(); }
+
+template<class T>
+volatile auto AutoVolatileT() { return T(); }
+
+template<class T>
+const volatile auto AutoConstVolatileT() { return T(); }
+
+// The qualifiers of the return type should always be emitted even for void types.
+// Void types usually have their qualifers stripped in the mangled name for MSVC ABI.
+void test_template_auto_void() {
+  AutoT<void>();
+  // CHECK: call {{.*}} @"??$AutoT@X@@YA?A_PXZ"
+
+  AutoT<const void>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CBX@@YA?A_PXZ"
+
+  AutoT<volatile void>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CCX@@YA?A_PXZ"
+
+  AutoT<const volatile void>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CDX@@YA?A_PXZ"
+
+  AutoConstT<void>();
+  // CHECK: call {{.*}} @"??$AutoConstT@X@@YA?B_PXZ"
+
+  AutoVolatileT<void>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@X@@YA?C_PXZ"
+
+  AutoConstVolatileT<void>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@X@@YA?D_PXZ"
+}
+
+void test_template_auto_int() {
+  AutoT<int>();
+  // CHECK: call {{.*}} @"??$AutoT@H@@YA?A_PXZ"
+
+  AutoT<const int>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CBH@@YA?A_PXZ"
+
+  AutoT<volatile int>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CCH@@YA?A_PXZ"
+
+  AutoT<const volatile int>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CDH@@YA?A_PXZ"
+
+  AutoConstT<int>();
+  // CHECK: call {{.*}} @"??$AutoConstT@H@@YA?B_PXZ"
+
+  AutoVolatileT<int>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@H@@YA?C_PXZ"
+
+  AutoConstVolatileT<int>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@H@@YA?D_PXZ"
+}
+
+void test_template_auto_struct() {
+  AutoT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoT@UStructA@@@@YA?A_PXZ"
+
+  AutoT<const StructA>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CBUStructA@@@@YA?A_PXZ"
+
+  AutoConstT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoConstT@UStructA@@@@YA?B_PXZ"
+
+  AutoVolatileT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@UStructA@@@@YA?C_PXZ"
+
+  AutoConstVolatileT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@UStructA@@@@YA?D_PXZ"
+}
+
+void test_template_auto_ptr() {
+  AutoT<int*>();
+  // CHECK: call {{.*}} @"??$AutoT@PEAH@@YA?A_PXZ"
+
+  AutoT<const int*>();
+  // CHECK: call {{.*}} @"??$AutoT@PEBH@@YA?A_PXZ"
+
+  AutoT<const int* const>();
+  // CHECK: call {{.*}} @"??$AutoT@QEBH@@YA?A_PXZ"
+
+  AutoConstT<int*>();
+  // CHECK: call {{.*}} @"??$AutoConstT@PEAH@@YA?B_PXZ"
+
+  AutoVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@PEAH@@YA?C_PXZ"
+
+  AutoConstVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@PEAH@@YA?D_PXZ"
+}
+
+template<class T>
+auto* PtrAutoT() { return T(); }
+
+template<class T>
+const auto* PtrAutoConstT() { return T(); }
+
+template<class T>
+volatile auto* PtrAutoVolatileT() { return T(); }
+
+template<class T>
+const volatile auto* PtrAutoConstVolatileT() { return T(); }
+
+void test_template_ptr_auto() {
+  PtrAutoT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoT@PEAH@@YAPEA_PXZ"
+
+  PtrAutoT<const int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoT@PEBH@@YAPEA_PXZ"
+
+  PtrAutoT<const int* const>();
+  // CHECK: call {{.*}} @"??$PtrAutoT@QEBH@@YAPEA_PXZ"
+
+  PtrAutoConstT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoConstT@PEAH@@YAPEB_PXZ"
+
+  PtrAutoVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoVolatileT@PEAH@@YAPEC_PXZ"
+
+  PtrAutoConstVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoConstVolatileT@PEAH@@YAPED_PXZ"
+}
+
+int func_int();
+const int func_constint();
+void func_void();
+int* func_intptr();
+
+template<class T, T v>
+auto (*FuncPtrAutoT())() { return v; }
+
+void test_template_func_ptr_auto() {
+  FuncPtrAutoT<int (*)(), &func_int>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AHXZ$1?func_int@@YAHXZ@@YAP6A?A_PXZXZ"
+
+  FuncPtrAutoT<const int (*)(), &func_constint>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6A?BHXZ$1?func_constint@@YA?BHXZ@@YAP6A?A_PXZXZ"
+
+  FuncPtrAutoT<void (*)(), &func_void>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AXXZ$1?func_void@@YAXXZ@@YAP6A?A_PXZXZ"
+
+  FuncPtrAutoT<int * (*)(), &func_intptr>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6APEAHXZ$1?func_intptr@@YAPEAHXZ@@YAP6A?A_PXZXZ"
+}
+
+template<class T>
+auto& RefAutoT(T& x) { return x; }
+
+template<class T>
+const auto& ConstRefAutoT(T& x) { return x; }
+
+template<class T>
+auto&& RRefAutoT(T& x) { return static_cast<int&&>(x); }
+
+void test_template_ref_auto() {
+  int x;
+
+  RefAutoT(x);
+  // CHECK: call {{.*}} @"??$RefAutoT@H@@YAAEA_PAEAH@Z"
+
+  ConstRefAutoT(x);
+  // CHECK: call {{.*}} @"??$ConstRefAutoT@H@@YAAEB_PAEAH@Z"
+
+  RRefAutoT(x);
+  // CHECK: call {{.*}} @"??$RRefAutoT@H@@YA$$QEA_PAEAH@Z"
+}
+
+template<class T>
+decltype(auto) DecltypeAutoT() { return T(); }
+
+template<class T>
+decltype(auto) DecltypeAutoT2(T& x) { return static_cast<T&&>(x); }
+
+void test_template_decltypeauto() {
+  DecltypeAutoT<void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@X@@YA?A_TXZ"
+
+  DecltypeAutoT<const void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBX@@YA?A_TXZ"
+
+  DecltypeAutoT<volatile void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCX@@YA?A_TXZ"
+
+  DecltypeAutoT<const volatile void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDX@@YA?A_TXZ"
+
+  DecltypeAutoT<int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@H@@YA?A_TXZ"
+
+  DecltypeAutoT<const int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBH@@YA?A_TXZ"
+
+  DecltypeAutoT<volatile int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCH@@YA?A_TXZ"
+
+  DecltypeAutoT<const volatile int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDH@@YA?A_TXZ"
+
+  int x;
+
+  DecltypeAutoT2(x);
+  // CHECK: call {{.*}} @"??$DecltypeAutoT2@H@@YA?A_TAEAH@Z"
+}
+
+// Still want to use clang's custom mangling for lambdas to keep backwards compatibility until
+// MSVC lambda name mangling has been deciphered.
+void test_lambda() {
+  auto lambdaIntRetAuto = []() { return 0; };
+  lambdaIntRetAuto();
+  // CHECK: call {{.*}} @"??R<lambda_1>@?0??test_lambda@@YAXXZ@QEBA?A?<auto>@@XZ"
+
+  auto lambdaIntRet = []() -> int { return 0; };
+  lambdaIntRet();
+  // CHECK: call {{.*}} @"??R<lambda_2>@?0??test_lambda@@YAXXZ@QEBA@XZ"
+
+  auto lambdaGenericIntIntRetAuto = [](auto a) { return a; };
+  lambdaGenericIntIntRetAuto(0);
+  // CHECK: call {{.*}} @"??$?RH@<lambda_0>@?0??test_lambda@@YAXXZ@QEBA?A?<auto>@@H@Z"
+}
+
+auto TestTrailingInt() -> int {
+  return 0;
+}
+
+auto TestTrailingConstVolatileVoid() -> const volatile void {
+}
+
+auto TestTrailingStructA() -> StructA {
+  return StructA{};
+}
+
+void test_trailing_return() {
+  TestTrailingInt();
+  // CHECK: call {{.*}} @"?TestTrailingInt@@YAHXZ"
+
+  TestTrailingConstVolatileVoid();
+  // CHECK: call {{.*}} @"?TestTrailingConstVolatileVoid@@YAXXZ"
+
+  TestTrailingStructA();
+  // CHECK: call {{.*}} @"?TestTrailingStructA@@YA?AUStructA@@XZ"
+}
+
+auto TestNonTemplateAutoInt() {
+  return 0;
+}
+
+auto TestNonTemplateAutoVoid() {
+  return;
+}
+
+auto TestNonTemplateAutoStructA() {
+  return StructA{};
+}
+
+const auto TestNonTemplateConstAutoInt() {
+  return 0;
+}
+
+const auto TestNonTemplateConstAutoVoid() {
+  return;
+}
+
+const auto TestNonTemplateConstAutoStructA() {
+  return StructA{};
+}
+
+void test_nontemplate_auto() {
+  TestNonTemplateAutoInt();
+  // CHECK: call {{.*}} @"?TestNonTemplateAutoInt@@YA@XZ"
+
+  TestNonTemplateAutoVoid();
+  // CHECK: call {{.*}} @"?TestNonTemplateAutoVoid@@YA@XZ"
+
+  TestNonTemplateAutoStructA();
+  // CHECK: call {{.*}} @"?TestNonTemplateAutoStructA@@YA@XZ"
+
+  TestNonTemplateConstAutoInt();
+  // CHECK: call {{.*}} @"?TestNonTemplateConstAutoInt@@YA@XZ"
+
+  TestNonTemplateConstAutoVoid();
+  // CHECK: call {{.*}} @"?TestNonTemplateConstAutoVoid@@YA@XZ"
+
+  TestNonTemplateConstAutoStructA();
+  // CHECK: call {{.*}} @"?TestNonTemplateConstAutoStructA@@YA@XZ"
+}
+
+decltype(auto) TestNonTemplateDecltypeAutoInt() {
+    return 0;
+}
+
+decltype(auto) TestNonTemplateDecltypeAutoVoid() {
+    return;
+}
+
+decltype(auto) TestNonTemplateDecltypeAutoStructA() {
+    return StructA{};
+}
+
+void test_nontemplate_decltypeauto() {
+  TestNonTemplateDecltypeAutoInt();
+  // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoInt@@YA@XZ"
+
+  TestNonTemplateDecltypeAutoVoid();
+  // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoVoid@@YA@XZ"
+
+  TestNonTemplateDecltypeAutoStructA();
+  // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoStructA@@YA@XZ"
+}
+
+struct StructB {
+  int x;
+};
+
+template<class T>
+auto StructB::* AutoMemberDataPtrT(T x) { return x; }
+
+template<class T>
+const auto StructB::* AutoConstMemberDataPtrT(T x) { return x; }
+
+void test_template_auto_member_data_ptr() {
+  AutoMemberDataPtrT(&StructB::x);
+  // CHECK: call {{.*}} @"??$AutoMemberDataPtrT@PEQStructB@@H@@YAPEQStructB@@_PPEQ0@H@Z"
+
+  AutoConstMemberDataPtrT(&StructB::x);
+  // CHECK: call {{.*}} @"??$AutoConstMemberDataPtrT@PEQStructB@@H@@YAPERStructB@@_PPEQ0@H@Z"
+}
+
+struct StructC {
+  void test() {}
+};
+
+struct StructD {
+  const int test() { return 0; }
+};
+
+template<class T>
+auto (StructC::*AutoMemberFuncPtrT(T x))() { return x; }
+
+template<class T>
+const auto (StructD::*AutoConstMemberFuncPtrT(T x))() { return x; }
+
+void test_template_auto_member_func_ptr() {
+  AutoMemberFuncPtrT(&StructC::test);
+  // CHECK: call {{.*}} @"??$AutoMemberFuncPtrT@P8StructC@@EAAXXZ@@YAP8StructC@@EAA?A_PXZP80@EAAXXZ@Z"
+
+  AutoConstMemberFuncPtrT(&StructD::test);
+  // CHECK: call {{.*}} @"??$AutoConstMemberFuncPtrT@P8StructD@@EAA?BHXZ@@YAP8StructD@@EAA?B_PXZP80@EAA?BHXZ@Z"
+}
+
+template<class T>
+auto * __attribute__((address_space(1))) * AutoPtrAddressSpaceT() {
+  T * __attribute__((address_space(1))) * p = nullptr;
+  return p;
+}
+
+void test_template_auto_address_space_ptr() {
+  AutoPtrAddressSpaceT<int>();
+  // CHECK: call {{.*}} @"??$AutoPtrAddressSpaceT@H@@YA?A?<auto>@@XZ"
+}
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp
index 360ebdecc5562b..b7bc3953f0b438 100644
--- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp
@@ -34,15 +34,15 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$I?f@V@@QEAAXXZA@A@@@QEAA@XZ"
 
   AutoFunc<&S::f>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$1?f@S@@QEAAXXZ@@YA?A?<auto>@@XZ"
 
   AutoFunc<&M::f>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$H?f@M@@QEAAXXZA@@@YA?A?<auto>@@XZ"
 
   AutoFunc<&V::f>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$I?f@V@@QEAAXXZA@A@@@YA?A?<auto>@@XZ"
 
   AutoParmTemplate<&S::a> auto_data_single_inheritance;
@@ -58,14 +58,14 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$FBA@A@@@QEAA@XZ"
 
   AutoFunc<&S::a>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$07@@YA?A?<auto>@@XZ"
 
   AutoFunc<&M::a>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$0M@@@YA?A?<auto>@@XZ"
 
   AutoFunc<&V::a>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$FBA@A@@@YA?A?<auto>@@XZ"
 }
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp
index 8f98c1e59f73d7..251d9219c01ce2 100644
--- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp
@@ -19,6 +19,6 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0A@@@QEAA@XZ"
 
   AutoFunc<nullptr>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$0A@@@YA?A?<auto>@@XZ"
 }
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp
index ff5395cea75eb7..effcc31ee31103 100644
--- a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp
@@ -26,7 +26,7 @@ int j;
 
 void template_mangling() {
   AutoFunc<1>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$00@@YA?A?<auto>@@XZ"
   AutoParmTemplate<0> auto_int;
   // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$MH0A@@@QEAA@XZ"
@@ -52,7 +52,7 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$00$0HPPPPPPPPPPPPPPP@@@QEAA@XZ"
 
   AutoFunc<&i>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$1?i@@3HA@@YA?A?<auto>@@XZ"
 
   AutoParmTemplate<&i> auto_int_ptr;
@@ -64,7 +64,7 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$1?i@@3HA$1?j@@3HA@@QEAA@XZ"
 
   AutoFunc<&Func>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$1?Func@@YAHXZ@@YA?A?<auto>@@XZ"
 
   AutoParmTemplate<&Func> auto_func_ptr;
diff --git a/clang/test/Driver/darwin-print-target-triple.c b/clang/test/Driver/darwin-print-target-triple.c
new file mode 100644
index 00000000000000..4f5fdfe9d0db34
--- /dev/null
+++ b/clang/test/Driver/darwin-print-target-triple.c
@@ -0,0 +1,42 @@
+// Test the output of -print-target-triple on Darwin.
+// See https://github.com/llvm/llvm-project/issues/61762
+
+//
+// All platforms
+//
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=x86_64-apple-macos -mmacos-version-min=15 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-MACOS %s
+// CHECK-CLANGRT-MACOS: x86_64-apple-macosx15.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-ios -mios-version-min=9 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-IOS %s
+// CHECK-CLANGRT-IOS: arm64-apple-ios9.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-watchos -mwatchos-version-min=3 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS %s
+// CHECK-CLANGRT-WATCHOS: arm64-apple-watchos3.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=armv7k-apple-watchos -mwatchos-version-min=3 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS-ARMV7K %s
+// CHECK-CLANGRT-WATCHOS-ARMV7K: thumbv7-apple-watchos3.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-tvos -mtvos-version-min=1\
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-TVOS %s
+// CHECK-CLANGRT-TVOS: arm64-apple-tvos1.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-driverkit \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-DRIVERKIT %s
+// CHECK-CLANGRT-DRIVERKIT: arm64-apple-driverkit19.0.0
diff --git a/clang/test/Headers/gpu_disabled_math.cpp b/clang/test/Headers/gpu_disabled_math.cpp
new file mode 100644
index 00000000000000..6e697f52120aeb
--- /dev/null
+++ b/clang/test/Headers/gpu_disabled_math.cpp
@@ -0,0 +1,41 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h      \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -D __CLANG_GPU_DISABLE_MATH_WRAPPERS | FileCheck -check-prefix=AMDGPU %s
+
+// RUN: %clang_cc1 -include __clang_cuda_runtime_wrapper.h      \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple nvptx64-nvidia-cuda -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu sm_90 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -D __CLANG_GPU_DISABLE_MATH_WRAPPERS | FileCheck -check-prefix=NVPTX %s
+
+extern "C" double sin(double x);
+
+// AMDGPU-LABEL: define dso_local noundef double @_Z3food(
+// AMDGPU-SAME: double noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[X_ADDR:%.*]] = alloca double, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGPU-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// AMDGPU-NEXT:    store double [[X]], ptr [[X_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP1:%.*]] = call double @llvm.sin.f64(double [[TMP0]])
+// AMDGPU-NEXT:    ret double [[TMP1]]
+//
+// NVPTX-LABEL: define dso_local noundef double @_Z3food(
+// NVPTX-SAME: double noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[X_ADDR:%.*]] = alloca double, align 8
+// NVPTX-NEXT:    store double [[X]], ptr [[X_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load double, ptr [[X_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP1:%.*]] = call double @llvm.sin.f64(double [[TMP0]])
+// NVPTX-NEXT:    ret double [[TMP1]]
+//
+double foo(double x) {
+  return sin(x);
+}
diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c
index cdbe1e95cba965..35543e6a49ffda 100644
--- a/clang/test/Misc/warning-flags.c
+++ b/clang/test/Misc/warning-flags.c
@@ -18,14 +18,13 @@ This test serves two purposes:
 
 The list of warnings below should NEVER grow.  It should gradually shrink to 0.
 
-CHECK: Warnings without flags (65):
+CHECK: Warnings without flags (64):
 
 CHECK-NEXT:   ext_expected_semi_decl_list
 CHECK-NEXT:   ext_missing_whitespace_after_macro_name
 CHECK-NEXT:   ext_new_paren_array_nonconst
 CHECK-NEXT:   ext_plain_complex
 CHECK-NEXT:   ext_template_arg_extra_parens
-CHECK-NEXT:   ext_template_spec_extra_headers
 CHECK-NEXT:   ext_typecheck_cond_incompatible_operands
 CHECK-NEXT:   ext_typecheck_ordered_comparison_of_pointer_integer
 CHECK-NEXT:   ext_using_undefined_std
diff --git a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
index c72c8b3c222b6b..fc48c9b2589f7e 100644
--- a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
 
 void test_too_few_arg()
 {
diff --git a/clang/test/SemaTemplate/temp_explicit.cpp b/clang/test/SemaTemplate/temp_explicit.cpp
index 0bb0cfad61fdb0..4612e4a57e90e0 100644
--- a/clang/test/SemaTemplate/temp_explicit.cpp
+++ b/clang/test/SemaTemplate/temp_explicit.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -Wc++11-compat %s
 // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -Wc++11-compat -std=c++98 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -pedantic -std=c++20 %s
 //
 // Tests explicit instantiation of templates.
 template<typename T, typename U = T> class X0 { };
@@ -128,11 +129,15 @@ struct Foo<int> // expected-note{{header not required for explicitly-specialized
     {};
 };
 
-template <> // expected-warning{{extraneous template parameter list}}
+template <> // expected-error{{extraneous template parameter list}}
 template <>
 struct Foo<int>::Bar<void>
 {};
 
+#if __cplusplus >= 202002L
+template<> void f(auto); // expected-error{{extraneous template parameter list}}
+#endif
+
 namespace N1 {
 
   template<typename T> struct X7 { }; // expected-note{{here}}
diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp
index 9e66f77217ec6b..e041861edaf0b7 100644
--- a/compiler-rt/lib/asan/asan_allocator.cpp
+++ b/compiler-rt/lib/asan/asan_allocator.cpp
@@ -576,15 +576,8 @@ struct Allocator {
     }
 
     AsanThread *t = GetCurrentThread();
-    void *allocated;
-    if (t) {
-      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
-      allocated = allocator.Allocate(cache, needed_size, 8);
-    } else {
-      SpinMutexLock l(&fallback_mutex);
-      AllocatorCache *cache = &fallback_allocator_cache;
-      allocated = allocator.Allocate(cache, needed_size, 8);
-    }
+    void *allocated = allocator.Allocate(
+        GetAllocatorCache(&t->malloc_storage()), needed_size, 8);
     if (UNLIKELY(!allocated)) {
       SetAllocatorOutOfMemory();
       if (AllocatorMayReturnNull())
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index d7d4967c949859..f478b9979f2daa 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -199,15 +199,8 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
     ReportRssLimitExceeded(stack);
   }
   MsanThread *t = GetCurrentThread();
-  void *allocated;
-  if (t) {
-    AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
-    allocated = allocator.Allocate(cache, size, alignment);
-  } else {
-    SpinMutexLock l(&fallback_mutex);
-    AllocatorCache *cache = &fallback_allocator_cache;
-    allocated = allocator.Allocate(cache, size, alignment);
-  }
+  void *allocated = allocator.Allocate(GetAllocatorCache(&t->malloc_storage()),
+                                       size, alignment);
   if (UNLIKELY(!allocated)) {
     SetAllocatorOutOfMemory();
     if (AllocatorMayReturnNull())
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp
index f33aa2004db79e..d0be7f4fa87899 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp
@@ -1,5 +1,7 @@
 // RUN: %clangxx %s -o %t && %run %t %p
 
+// UNSUPPORTED: android
+
 #include <assert.h>
 #include <errno.h>
 #include <stdint.h>
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index f0bfc1548e6458..4527c9f18fa054 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -13,11 +13,10 @@
 #include "flang/Runtime/entry-names.h"
 
 #define CUDA_REPORT_IF_ERROR(expr) \
-  [](CUresult result) { \
-    if (!result) \
+  [](cudaError_t err) { \
+    if (err == cudaSuccess) \
       return; \
-    const char *name = nullptr; \
-    cuGetErrorName(result, &name); \
+    const char *name = cudaGetErrorName(err); \
     if (!name) \
       name = "<unknown>"; \
     Terminator terminator{__FILE__, __LINE__}; \
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 95c55805dcabb3..64b581e8910d07 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1371,6 +1371,9 @@ genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
               llvm::ArrayRef<const semantics::Symbol *> wrapperSyms,
               llvm::ArrayRef<mlir::BlockArgument> wrapperArgs,
               llvm::omp::Directive directive, DataSharingProcessor &dsp) {
+  assert(wrapperSyms.size() == wrapperArgs.size() &&
+         "Number of symbols and wrapper block arguments must match");
+
   auto ivCallback = [&](mlir::Operation *op) {
     genLoopVars(op, converter, loc, iv, wrapperSyms, wrapperArgs);
     return llvm::SmallVector<const semantics::Symbol *>(iv);
@@ -2083,8 +2086,6 @@ static void genCompositeDistributeSimd(
       llvm::concat<mlir::BlockArgument>(distributeOp.getRegion().getArguments(),
                                         simdOp.getRegion().getArguments()));
 
-  assert(wrapperArgs.empty() &&
-         "Block args for omp.simd and omp.distribute currently not expected");
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, /*wrapperSyms=*/{}, wrapperArgs,
                 llvm::omp::Directive::OMPD_distribute_simd, dsp);
@@ -2132,8 +2133,6 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
   auto wrapperArgs = llvm::to_vector(llvm::concat<mlir::BlockArgument>(
       wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments()));
 
-  assert(wsloopReductionSyms.size() == wrapperArgs.size() &&
-         "Number of symbols and wrapper block arguments must match");
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, wsloopReductionSyms, wrapperArgs,
                 llvm::omp::Directive::OMPD_do_simd, dsp);
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 88243536139e46..c9a20ebcc82e07 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -7,14 +7,20 @@
 #===------------------------------------------------------------------------===#
 
 include_directories(${CUDAToolkit_INCLUDE_DIRS})
-find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
 
 add_flang_library(CufRuntime
   allocator.cpp
   descriptor.cpp
 )
+
+if (BUILD_SHARED_LIBS)
+  set(CUDA_RT_TARGET CUDA::cudart)
+else()
+  set(CUDA_RT_TARGET CUDA::cudart_static)
+endif()
+
 target_link_libraries(CufRuntime
   PRIVATE
   FortranRuntime
-  ${CUDA_RUNTIME_LIBRARY}
+  ${CUDA_RT_TARGET}
 )
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index bd657b800c61e8..d4a473d58e86cd 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -15,7 +15,7 @@
 #include "flang/ISO_Fortran_binding_wrapper.h"
 #include "flang/Runtime/allocator-registry.h"
 
-#include "cuda.h"
+#include "cuda_runtime.h"
 
 namespace Fortran::runtime::cuda {
 extern "C" {
@@ -34,32 +34,28 @@ void RTDEF(CUFRegisterAllocator)() {
 
 void *CUFAllocPinned(std::size_t sizeInBytes) {
   void *p;
-  CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
+  CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
   return p;
 }
 
-void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
+void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
 
 void *CUFAllocDevice(std::size_t sizeInBytes) {
-  CUdeviceptr p = 0;
-  CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
-  return reinterpret_cast<void *>(p);
+  void *p;
+  CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
+  return p;
 }
 
-void CUFFreeDevice(void *p) {
-  CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
-}
+void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
 void *CUFAllocManaged(std::size_t sizeInBytes) {
-  CUdeviceptr p = 0;
+  void *p;
   CUDA_REPORT_IF_ERROR(
-      cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
+      cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
   return reinterpret_cast<void *>(p);
 }
 
-void CUFFreeManaged(void *p) {
-  CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
-}
+void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
 void *CUFAllocUnified(std::size_t sizeInBytes) {
   // Call alloc managed for the time being.
diff --git a/flang/test/Driver/fveclib-codegen.f90 b/flang/test/Driver/fveclib-codegen.f90
index 8d7d3af1e8f9ba..3a96c29ac70854 100644
--- a/flang/test/Driver/fveclib-codegen.f90
+++ b/flang/test/Driver/fveclib-codegen.f90
@@ -1,6 +1,7 @@
 ! test that -fveclib= is passed to the backend
 ! -target aarch64 so that ArmPL is available
-! RUN: %flang -S -Ofast -fveclib=LIBMVEC -o - %s | FileCheck %s
+! RUN: %if aarch64-registered-target %{ %flang -S -Ofast -target aarch64-unknown-linux-gnu -fveclib=LIBMVEC -o - %s | FileCheck %s %}
+! RUN: %if x86-registered-target %{ %flang -S -Ofast -target x86_64-unknown-linux-gnu -fveclib=LIBMVEC -o - %s | FileCheck %s %}
 ! RUN: %flang -S -Ofast -fveclib=NoLibrary -o - %s | FileCheck %s --check-prefix=NOLIB
 
 subroutine sb(a, b)
diff --git a/flang/test/Lower/default-initialization-globals.f90 b/flang/test/Lower/default-initialization-globals.f90
index 384d1cb763ad67..e9611dab467cba 100644
--- a/flang/test/Lower/default-initialization-globals.f90
+++ b/flang/test/Lower/default-initialization-globals.f90
@@ -1,5 +1,5 @@
 ! Test default initialization of global variables (static init)
-! RUN: bbc -hlfir=false %s -o - | FileCheck %s --check-prefixes=%if system-aix %{"CHECK","CHECK-BE"%} \
+! RUN: bbc -hlfir=false %s -o - | FileCheck %s --check-prefixes=%if target={{.*-aix.*|sparc.*}} %{"CHECK","CHECK-BE"%} \
 ! RUN:                                         %else %{"CHECK","CHECK-LE"%}
 
 module tinit
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 9f5ec289ee8f74..b51ff0ac006cc6 100644
--- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -14,7 +14,7 @@
 #include "flang/Runtime/allocatable.h"
 #include "flang/Runtime/allocator-registry.h"
 
-#include "cuda.h"
+#include "cuda_runtime.h"
 
 using namespace Fortran::runtime;
 using namespace Fortran::runtime::cuda;
@@ -25,38 +25,9 @@ static OwningPtr<Descriptor> createAllocatable(
       CFI_attribute_allocatable);
 }
 
-thread_local static int32_t defaultDevice = 0;
-
-CUdevice getDefaultCuDevice() {
-  CUdevice device;
-  CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
-  return device;
-}
-
-class ScopedContext {
-public:
-  ScopedContext() {
-    // Static reference to CUDA primary context for device ordinal
-    // defaultDevice.
-    static CUcontext context = [] {
-      CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
-      CUcontext ctx;
-      // Note: this does not affect the current context.
-      CUDA_REPORT_IF_ERROR(
-          cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
-      return ctx;
-    }();
-
-    CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
-  }
-
-  ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
-};
-
 TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
   using Fortran::common::TypeCategory;
   RTNAME(CUFRegisterAllocator)();
-  ScopedContext ctx;
   // REAL(4), DEVICE, ALLOCATABLE :: a(:)
   auto a{createAllocatable(TypeCategory::Real, 4)};
   a->SetAllocIdx(kDeviceAllocatorPos);
@@ -74,7 +45,6 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
 TEST(AllocatableCUFTest, SimplePinnedAllocate) {
   using Fortran::common::TypeCategory;
   RTNAME(CUFRegisterAllocator)();
-  ScopedContext ctx;
   // INTEGER(4), PINNED, ALLOCATABLE :: a(:)
   auto a{createAllocatable(TypeCategory::Integer, 4)};
   EXPECT_FALSE(a->HasAddendum());
@@ -93,7 +63,6 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) {
 TEST(AllocatableCUFTest, DescriptorAllocationTest) {
   using Fortran::common::TypeCategory;
   RTNAME(CUFRegisterAllocator)();
-  ScopedContext ctx;
   // REAL(4), DEVICE, ALLOCATABLE :: a(:)
   auto a{createAllocatable(TypeCategory::Real, 4)};
   Descriptor *desc = nullptr;
diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
index 207f8e4df45413..2b97d9a5bc745b 100644
--- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
@@ -14,7 +14,7 @@
 // XFAIL: availability-tzdb-missing
 
 // TODO TZDB Investigate
-// XFAIL: target={{armv(7|8)l-linux-gnueabihf}}
+// UNSUPPORTED: target={{armv(7|8)l-linux-gnueabihf}}
 
 #include <chrono>
 #include <format>
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index c27ab2b67dc2b2..7d26fa9aea74ab 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1639,14 +1639,14 @@ void RelocationBaseSection::addSymbolReloc(
 }
 
 void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
-    RelType dynType, GotSection &sec, uint64_t offsetInSec, Symbol &sym,
+    RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym,
     RelType addendRelType) {
   // No need to write an addend to the section for preemptible symbols.
   if (sym.isPreemptible)
-    addReloc({dynType, &sec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0,
+    addReloc({dynType, &isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0,
               R_ABS});
   else
-    addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, sec, offsetInSec,
+    addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, offsetInSec,
              sym, 0, R_ABS, addendRelType);
 }
 
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index d4169e1e1acaf6..43eb82cbb3e28b 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -523,7 +523,8 @@ class RelocationBaseSection : public SyntheticSection {
   }
   /// Add a dynamic relocation using the target address of \p sym as the addend
   /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym.
-  void addAddendOnlyRelocIfNonPreemptible(RelType dynType, GotSection &sec,
+  void addAddendOnlyRelocIfNonPreemptible(RelType dynType,
+                                          InputSectionBase &isec,
                                           uint64_t offsetInSec, Symbol &sym,
                                           RelType addendRelType);
   template <bool shard = false>
diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 18b7521ed1ad2a..b9f7592fa9c663 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -851,7 +851,6 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
       infoCategoryWriter.catPtrListInfo.align);
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
   listSec->live = true;
-  addInputSection(listSec);
 
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
 
@@ -867,6 +866,7 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
 
   ptrListSym->used = true;
   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+  addInputSection(listSec);
 
   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -911,7 +911,6 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
       infoCategoryWriter.catPtrListInfo.align);
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
   listSec->live = true;
-  addInputSection(listSec);
 
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
 
@@ -927,6 +926,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
 
   ptrListSym->used = true;
   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+  addInputSection(listSec);
 
   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -952,7 +952,6 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
                                bodyData, infoCategoryWriter.catListInfo.align);
   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
   newCatList->live = true;
-  addInputSection(newCatList);
 
   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
 
@@ -968,6 +967,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
 
   catListSym->used = true;
   objFile->symbols.push_back(catListSym);
+  addInputSection(newCatList);
   return catListSym;
 }
 
@@ -990,7 +990,6 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
                                bodyData, infoCategoryWriter.catBodyInfo.align);
   newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
   newBodySec->live = true;
-  addInputSection(newBodySec);
 
   std::string symName =
       objc::symbol_names::category + baseClassName + "(" + name + ")";
@@ -1003,6 +1002,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
 
   catBodySym->used = true;
   objFile->symbols.push_back(catBodySym);
+  addInputSection(newBodySec);
 
   createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -1223,7 +1223,6 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
           infoCategoryWriter.catListInfo.align);
       listSec->parent = infoCategoryWriter.catListInfo.outputSection;
       listSec->live = true;
-      addInputSection(listSec);
 
       std::string slotSymName = "<__objc_catlist slot for category ";
       slotSymName += nonErasedCatBody->getName();
@@ -1238,6 +1237,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
 
       catListSlotSym->used = true;
       objFile->symbols.push_back(catListSlotSym);
+      addInputSection(listSec);
 
       // Now link the category body into the newly created slot
       createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
diff --git a/lld/test/MachO/objc-category-merging-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s
index 527493303c583e..b94799a57a4d85 100644
--- a/lld/test/MachO/objc-category-merging-minimal.s
+++ b/lld/test/MachO/objc-category-merging-minimal.s
@@ -9,7 +9,7 @@
 ## Create our main testing dylib - linking against the fake dylib above
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal.o merge_cat_minimal.s
 # RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_no_merge.dylib a64_fakedylib.dylib merge_cat_minimal.o
-# RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o
+# RUN: %lld -objc_relative_method_lists -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o
 
 ## Now verify that the flag caused category merging to happen appropriatelly
 # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS
@@ -17,7 +17,7 @@
 
 ############ Test merging multiple categories into the base class ############
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s
-# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
+# RUN: %lld -arch arm64 -dylib -objc_relative_method_lists -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
 # RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o
 
 # RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib  | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE
@@ -37,14 +37,14 @@ MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
 MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category01|Category02)
 MERGE_CATS-NEXT:   name {{.*}} Category01|Category02
 MERGE_CATS:       instanceMethods
-MERGE_CATS-NEXT:  24
-MERGE_CATS-NEXT:  2
+MERGE_CATS-NEXT:  entsize 12 (relative)
+MERGE_CATS-NEXT:  count 2
 MERGE_CATS-NEXT:   name {{.*}} cat01_InstanceMethod
 MERGE_CATS-NEXT:  types {{.*}} v16@0:8
-MERGE_CATS-NEXT:    imp -[MyBaseClass(Category01) cat01_InstanceMethod]
+MERGE_CATS-NEXT:    imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod]
 MERGE_CATS-NEXT:   name {{.*}} cat02_InstanceMethod
 MERGE_CATS-NEXT:  types {{.*}} v16@0:8
-MERGE_CATS-NEXT:    imp -[MyBaseClass(Category02) cat02_InstanceMethod]
+MERGE_CATS-NEXT:    imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod]
 MERGE_CATS-NEXT:         classMethods 0x0
 MERGE_CATS-NEXT:            protocols 0x0
 MERGE_CATS-NEXT:   instanceProperties 0x0
@@ -69,17 +69,17 @@ YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
 YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass
 YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass
 YES_MERGE_INTO_BASE: baseMethods
-YES_MERGE_INTO_BASE-NEXT: entsize 24
+YES_MERGE_INTO_BASE-NEXT: entsize 12 (relative)
 YES_MERGE_INTO_BASE-NEXT: count 3
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod]
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod]
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass baseInstanceMethod]
 
 
 #### Check merge swift category into base class ###
diff --git a/lldb/CodeOwners.rst b/lldb/CodeOwners.rst
index 52e3e550523e5b..3c10c2a28da9e7 100644
--- a/lldb/CodeOwners.rst
+++ b/lldb/CodeOwners.rst
@@ -17,7 +17,7 @@ assistance.
 All parts of LLDB not covered by someone else
 ----------------------------------------------
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 Components
 ----------
@@ -27,100 +27,100 @@ LLDB.
 ABI
 ~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 
 Breakpoint
 ~~~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 CMake & Build System
 ~~~~~~~~~~~~~~~~~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 | Alex Langford
-| alangford\@apple.com (email), bulbazord (Phabricator), bulbazord (GitHub), bulbazord (Discourse), bulba_zord (Discord)
+| alangford\@apple.com (email), bulbazord (GitHub), bulbazord (Discourse), bulba_zord (Discord)
 
 Commands
 ~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Expression Parser
 ~~~~~~~~~~~~~~~~~
 | Michael Buch
-| michaelbuch12\@gmail.com (email), Michael137 (Phabricator), Michael137 (GitHub), Michael137 (Discourse)
+| michaelbuch12\@gmail.com (email), Michael137 (GitHub), Michael137 (Discourse)
 
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Interpreter
 ~~~~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 
 Lua
 ~~~
 | Jonas Delvieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 Python
 ~~~~~~
 | Med Ismail Bennani
-| ismail\@bennani.ma (email), mib (Phabricator), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
+| ismail\@bennani.ma (email), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
 
 Target/Process Control
 ~~~~~~~~~~~~~~~~~~~~~~
 | Med Ismail Bennani
-| ismail\@bennani.ma (email), mib (Phabricator), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
+| ismail\@bennani.ma (email), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
 
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Test Suite
 ~~~~~~~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 Trace
 ~~~~~
 | Walter Erquinigo
-| a20012251\@gmail.com (email), wallace (Phabricator), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
+| a20012251\@gmail.com (email), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
 
 Unwinding
 ~~~~~~~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 Utility
 ~~~~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 ValueObject
 ~~~~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Watchpoints
 ~~~~~~~~~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 File Formats
 ------------
@@ -130,54 +130,54 @@ info formats.
 (PE)COFF
 ~~~~~~~~
 | Saleem Abdulrasool
-| compnerd\@compnerd.org (email), compnerd (Phabricator), compnerd (GitHub), compnerd (Discourse), compnerd (Discord)
+| compnerd\@compnerd.org (email), compnerd (GitHub), compnerd (Discourse), compnerd (Discord)
 
 Breakpad
 ~~~~~~~~
 | Zequan Wu
-| zequanwu\@google.com (email), zequanwu (Phabricator), ZequanWu (GitHub), ZequanWu (Discourse)
+| zequanwu\@google.com (email), ZequanWu (GitHub), ZequanWu (Discourse)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 CTF
 ~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 DWARF
 ~~~~~
 | Adrian Prantl
-| aprantl\@apple.com (email), aprantl (Phabricator), adrian-prantl (GitHub), adrian.prantl (Discourse), adrian.prantl (Discord), Adrian Prantl#4366 (Discourse)
+| aprantl\@apple.com (email), adrian-prantl (GitHub), adrian.prantl (Discourse), adrian.prantl (Discord), Adrian Prantl#4366 (Discourse)
 
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 ELF
 ~~~
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 JSON
 ~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 MachO
 ~~~~~
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 PDB
 ~~~
 | Zequan Wu
-| zequanwu\@google.com (email), zequanwu (Phabricator), ZequanWu (GitHub), ZequanWu (Discourse)
+| zequanwu\@google.com (email), ZequanWu (GitHub), ZequanWu (Discourse)
 
 Platforms
 ---------
@@ -186,36 +186,36 @@ The following people are responsible for decisions involving platforms.
 Android
 ~~~~~~~
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 Darwin
 ~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 FreeBSD
 ~~~~~~~
 | Ed Maste
-| emaste\@freebsd.org (email), emaste (Phabricator), emaste (GitHub), emaste (Discourse), emaste (Discord)
+| emaste\@freebsd.org (email), emaste (GitHub), emaste (Discourse), emaste (Discord)
 
 Linux
 ~~~~~
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 Windows
 ~~~~~~~
 | Omair Javaid
-| omair.javaid\@linaro.org (email), omjavaid (Phabricator), omjavaid (GitHub), omjavaid (Discourse), omjavaid#9902 (Discord)
+| omair.javaid\@linaro.org (email), omjavaid (GitHub), omjavaid (Discourse), omjavaid#9902 (Discord)
 
 Tools
 -----
@@ -224,23 +224,23 @@ The following people are responsible for decisions involving specific tools.
 debugserver
 ~~~~~~~~~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 lldb-server
 ~~~~~~~~~~~
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 lldb-dap
 ~~~~~~~~
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 | Walter Erquinigo
-| a20012251\@gmail.com (email), wallace (Phabricator), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
+| a20012251\@gmail.com (email), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
 
 Former Code Owners
 ==================
diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
index b714f7be187aca..d27bd1b7426e6c 100644
--- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
+++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
@@ -128,7 +128,7 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) {
 static bool ResolveAndVerifyCandidateSupportDir(FileSpec &path) {
   FileSystem::Instance().Resolve(path);
   return FileSystem::Instance().IsDirectory(path);
-};
+}
 
 bool HostInfoMacOSX::ComputeSupportExeDirectory(FileSpec &file_spec) {
   FileSpec lldb_file_spec = GetShlibDir();
diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst
index aef88bc43b224d..6b64d6f5916673 100644
--- a/llvm/docs/DirectX/DXILResources.rst
+++ b/llvm/docs/DirectX/DXILResources.rst
@@ -162,6 +162,10 @@ the subsequent ``dx.op.annotateHandle`` operation in. Note that we don't have
 an analogue for `dx.op.createHandle`_, since ``dx.op.createHandleFromBinding``
 subsumes it.
 
+For simplicity of lowering, We match DXIL in using an index from the beginning
+of the binding space rather than an index from the lower bound of the binding
+itself.
+
 .. _dx.op.createHandle: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#resource-handles
 
 .. list-table:: ``@llvm.dx.handle.fromBinding``
@@ -190,7 +194,7 @@ subsumes it.
    * - ``%index``
      - 4
      - ``i32``
-     - Index of the resource to access.
+     - Index from the beginning of the binding space to access.
    * - ``%non-uniform``
      - 5
      - i1
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 0ee4d7b444cfcf..5e5e9b9e8a93b1 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16131,6 +16131,96 @@ The returned value is completely identical to the input except for the sign bit;
 in particular, if the input is a NaN, then the quiet/signaling bit and payload
 are perfectly preserved.
 
+.. _i_fminmax_family:
+
+'``llvm.min.*``' Intrinsics Comparation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Standard:
+"""""""""
+
+IEEE754 and ISO C define some min/max operations, and they have some differences
+on working with qNaN/sNaN and +0.0/-0.0. Here is the list:
+
+.. list-table::
+   :header-rows: 2
+
+   * - ``ISO C``
+     - fmin/fmax
+     - fmininum/fmaximum
+     - fminimum_num/fmaximum_num
+
+   * - ``IEEE754``
+     - minNum/maxNum (2008)
+     - minimum/maximum (2019)
+     - minimumNumber/maximumNumber (2019)
+
+   * - ``+0.0 vs -0.0``
+     - either one
+     - +0.0 > -0.0
+     - +0.0 > -0.0
+
+   * - ``NUM vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - NUM, invalid exception
+
+   * - ``qNaN vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+
+   * - ``NUM vs qNaN``
+     - NUM, no exception
+     - qNaN, no exception
+     - NUM, no exception
+
+LLVM Implementation:
+""""""""""""""""""""
+
+LLVM implements all ISO C flavors as listed in this table, except in the
+default floating-point environment exceptions are ignored. The constrained
+versions of the intrinsics respect the exception behavior.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 16 28 28 28
+
+   * - Operation
+     - minnum/maxnum
+     - minimum/maximum
+     - minimumnum/maximumnum
+
+   * - ``NUM vs qNaN``
+     - NUM, no exception
+     - qNaN, no exception
+     - NUM, no exception
+
+   * - ``NUM vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - NUM, invalid exception
+
+   * - ``qNaN vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+
+   * - ``sNaN vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+
+   * - ``+0.0 vs -0.0``
+     - either one
+     - +0.0(max)/-0.0(min)
+     - +0.0(max)/-0.0(min)
+
+   * - ``NUM vs NUM``
+     - larger(max)/smaller(min)
+     - larger(max)/smaller(min)
+     - larger(max)/smaller(min)
+
 .. _i_minnum:
 
 '``llvm.minnum.*``' Intrinsic
@@ -16312,6 +16402,98 @@ of the two arguments. -0.0 is considered to be less than +0.0 for this
 intrinsic. Note that these are the semantics specified in the draft of
 IEEE 754-2019.
 
+.. _i_minimumnum:
+
+'``llvm.minimumnum.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.minimumnum`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.minimumnum.f32(float %Val0, float %Val1)
+      declare double    @llvm.minimumnum.f64(double %Val0, double %Val1)
+      declare x86_fp80  @llvm.minimumnum.f80(x86_fp80 %Val0, x86_fp80 %Val1)
+      declare fp128     @llvm.minimumnum.f128(fp128 %Val0, fp128 %Val1)
+      declare ppc_fp128 @llvm.minimumnum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1)
+
+Overview:
+"""""""""
+
+The '``llvm.minimumnum.*``' intrinsics return the minimum of the two
+arguments, not propagating NaNs and treating -0.0 as less than +0.0.
+
+
+Arguments:
+""""""""""
+
+The arguments and return value are floating-point numbers of the same
+type.
+
+Semantics:
+""""""""""
+If both operands are NaNs (including sNaN), returns qNaN. If one operand
+is NaN (including sNaN) and another operand is a number, return the number.
+Otherwise returns the lesser of the two arguments. -0.0 is considered to
+be less than +0.0 for this intrinsic.
+
+Note that these are the semantics of minimumNumber specified in IEEE 754-2019.
+
+It has some differences with '``llvm.minnum.*``':
+1)'``llvm.minnum.*``' will return qNaN if either operand is sNaN.
+2)'``llvm.minnum*``' may return either one if we compare +0.0 vs -0.0.
+
+.. _i_maximumnum:
+
+'``llvm.maximumnum.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.maximumnum`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.maximumnum.f32(float %Val0, float %Val1)
+      declare double    @llvm.maximumnum.f64(double %Val0, double %Val1)
+      declare x86_fp80  @llvm.maximumnum.f80(x86_fp80 %Val0, x86_fp80 %Val1)
+      declare fp128     @llvm.maximumnum.f128(fp128 %Val0, fp128 %Val1)
+      declare ppc_fp128 @llvm.maximumnum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1)
+
+Overview:
+"""""""""
+
+The '``llvm.maximumnum.*``' intrinsics return the maximum of the two
+arguments, not propagating NaNs and treating -0.0 as less than +0.0.
+
+
+Arguments:
+""""""""""
+
+The arguments and return value are floating-point numbers of the same
+type.
+
+Semantics:
+""""""""""
+If both operands are NaNs (including sNaN), returns qNaN. If one operand
+is NaN (including sNaN) and another operand is a number, return the number.
+Otherwise returns the greater of the two arguments. -0.0 is considered to
+be less than +0.0 for this intrinsic.
+
+Note that these are the semantics of maximumNumber specified in IEEE 754-2019.
+
+It has some differences with '``llvm.maxnum.*``':
+1)'``llvm.maxnum.*``' will return qNaN if either operand is sNaN.
+2)'``llvm.maxnum*``' may return either one if we compare +0.0 vs -0.0.
+
 .. _int_copysign:
 
 '``llvm.copysign.*``' Intrinsic
diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 231de56ef4cfee..41d1388e5bf7e9 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -164,20 +164,20 @@ rarely have to include this file directly).
   efficient to use the ``InstVisitor`` class to dispatch over the instruction
   type directly.
 
-``isa_and_nonnull<>``:
-  The ``isa_and_nonnull<>`` operator works just like the ``isa<>`` operator,
+``isa_and_present<>``:
+  The ``isa_and_present<>`` operator works just like the ``isa<>`` operator,
   except that it allows for a null pointer as an argument (which it then
   returns false).  This can sometimes be useful, allowing you to combine several
   null checks into one.
 
-``cast_or_null<>``:
-  The ``cast_or_null<>`` operator works just like the ``cast<>`` operator,
+``cast_if_present<>``:
+  The ``cast_if_present<>`` operator works just like the ``cast<>`` operator,
   except that it allows for a null pointer as an argument (which it then
   propagates).  This can sometimes be useful, allowing you to combine several
   null checks into one.
 
-``dyn_cast_or_null<>``:
-  The ``dyn_cast_or_null<>`` operator works just like the ``dyn_cast<>``
+``dyn_cast_if_present<>``:
+  The ``dyn_cast_if_present<>`` operator works just like the ``dyn_cast<>``
   operator, except that it allows for a null pointer as an argument (which it
   then propagates).  This can sometimes be useful, allowing you to combine
   several null checks into one.
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index d77c81d03582e1..f0e2aeb0f92f74 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -9,10 +9,10 @@
 #ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H
 #define LLVM_ANALYSIS_CTXPROFANALYSIS_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/ProfileData/PGOCtxProfReader.h"
-#include <map>
 
 namespace llvm {
 
@@ -20,12 +20,28 @@ class CtxProfAnalysis;
 
 /// The instrumented contextual profile, produced by the CtxProfAnalysis.
 class PGOContextualProfile {
+  friend class CtxProfAnalysis;
+  friend class CtxProfAnalysisPrinterPass;
+  struct FunctionInfo {
+    uint32_t NextCounterIndex = 0;
+    uint32_t NextCallsiteIndex = 0;
+    const std::string Name;
+
+    FunctionInfo(StringRef Name) : Name(Name) {}
+  };
   std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
+  // For the GUIDs in this module, associate metadata about each function which
+  // we'll need when we maintain the profiles during IPO transformations.
+  DenseMap<GlobalValue::GUID, FunctionInfo> FuncInfo;
 
-public:
-  explicit PGOContextualProfile(PGOCtxProfContext::CallTargetMapTy &&Profiles)
-      : Profiles(std::move(Profiles)) {}
+  /// Get the GUID of this Function if it's defined in this module.
+  GlobalValue::GUID getDefinedFunctionGUID(const Function &F) const;
+
+  // This is meant to be constructed from CtxProfAnalysis, which will also set
+  // its state piecemeal.
   PGOContextualProfile() = default;
+
+public:
   PGOContextualProfile(const PGOContextualProfile &) = delete;
   PGOContextualProfile(PGOContextualProfile &&) = default;
 
@@ -35,6 +51,20 @@ class PGOContextualProfile {
     return *Profiles;
   }
 
+  bool isFunctionKnown(const Function &F) const {
+    return getDefinedFunctionGUID(F) != 0;
+  }
+
+  uint32_t allocateNextCounterIndex(const Function &F) {
+    assert(isFunctionKnown(F));
+    return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++;
+  }
+
+  uint32_t allocateNextCallsiteIndex(const Function &F) {
+    assert(isFunctionKnown(F));
+    return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex++;
+  }
+
   bool invalidate(Module &, const PreservedAnalyses &PA,
                   ModuleAnalysisManager::Invalidator &) {
     // Check whether the analysis has been explicitly invalidated. Otherwise,
@@ -66,5 +96,27 @@ class CtxProfAnalysisPrinterPass
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
   static bool isRequired() { return true; }
 };
+
+/// Assign a GUID to functions as metadata. GUID calculation takes linkage into
+/// account, which may change especially through and after thinlto. By
+/// pre-computing and assigning as metadata, this mechanism is resilient to such
+/// changes (as well as name changes e.g. suffix ".llvm." additions).
+
+// FIXME(mtrofin): we can generalize this mechanism to calculate a GUID early in
+// the pass pipeline, associate it with any Global Value, and then use it for
+// PGO and ThinLTO.
+// At that point, this should be moved elsewhere.
+class AssignGUIDPass : public PassInfoMixin<AssignGUIDPass> {
+public:
+  explicit AssignGUIDPass() = default;
+
+  /// Assign a GUID *if* one is not already assign, as a function metadata named
+  /// `GUIDMetadataName`.
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+  static const char *GUIDMetadataName;
+  // This should become GlobalValue::getGUID
+  static uint64_t getGUID(const Function &F);
+};
+
 } // namespace llvm
 #endif // LLVM_ANALYSIS_CTXPROFANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 7be5bb04549c61..e1cb1e5c557eae 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -1388,6 +1388,39 @@ TLI_DEFINE_ENUM_INTERNAL(fminl)
 TLI_DEFINE_STRING_INTERNAL("fminl")
 TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
 
+// Calls to fmaximum_num and fminimum_num library functions expand to the llvm.maximumnum and
+// llvm.minimumnum intrinsics with the correct parameter types for the arguments
+// (all types must match).
+/// double fmaximum_num(double x, double y);
+TLI_DEFINE_ENUM_INTERNAL(fmaximum_num)
+TLI_DEFINE_STRING_INTERNAL("fmaximum_num")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// float fmaximum_numf(float x, float y);
+TLI_DEFINE_ENUM_INTERNAL(fmaximum_numf)
+TLI_DEFINE_STRING_INTERNAL("fmaximum_numf")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// long double fmaximum_numl(long double x, long double y);
+TLI_DEFINE_ENUM_INTERNAL(fmaximum_numl)
+TLI_DEFINE_STRING_INTERNAL("fmaximum_numl")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// double fminimum_num(double x, double y);
+TLI_DEFINE_ENUM_INTERNAL(fminimum_num)
+TLI_DEFINE_STRING_INTERNAL("fminimum_num")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// float fminimum_numf(float x, float y);
+TLI_DEFINE_ENUM_INTERNAL(fminimum_numf)
+TLI_DEFINE_STRING_INTERNAL("fminimum_numf")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// long double fminimum_numl(long double x, long double y);
+TLI_DEFINE_ENUM_INTERNAL(fminimum_numl)
+TLI_DEFINE_STRING_INTERNAL("fminimum_numl")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
 /// double fmod(double x, double y);
 TLI_DEFINE_ENUM_INTERNAL(fmod)
 TLI_DEFINE_STRING_INTERNAL("fmod")
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 279cfb5aa47d6f..77ddc10e8a0e76 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2037,6 +2037,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::maximum:
       ISD = ISD::FMAXIMUM;
       break;
+    case Intrinsic::minimumnum:
+      ISD = ISD::FMINIMUMNUM;
+      break;
+    case Intrinsic::maximumnum:
+      ISD = ISD::FMAXIMUMNUM;
+      break;
     case Intrinsic::copysign:
       ISD = ISD::FCOPYSIGN;
       break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 7305e3086fcd65..b8f8818a749528 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1047,6 +1047,11 @@ enum NodeType {
   FMINIMUM,
   FMAXIMUM,
 
+  /// FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with
+  /// FMINNUM_IEEE and FMAXNUM_IEEE besides if either operand is sNaN.
+  FMINIMUMNUM,
+  FMAXIMUMNUM,
+
   /// FSINCOS - Compute both fsin and fcos as a single operation.
   FSINCOS,
 
diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 96ece1559bc437..88ddd43a2a8913 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -737,6 +737,14 @@ template <typename Opnd> inline UnaryOpc_match<Opnd> m_VScale(const Opnd &Op) {
   return UnaryOpc_match<Opnd>(ISD::VSCALE, Op);
 }
 
+template <typename Opnd> inline UnaryOpc_match<Opnd> m_FPToUI(const Opnd &Op) {
+  return UnaryOpc_match<Opnd>(ISD::FP_TO_UINT, Op);
+}
+
+template <typename Opnd> inline UnaryOpc_match<Opnd> m_FPToSI(const Opnd &Op) {
+  return UnaryOpc_match<Opnd>(ISD::FP_TO_SINT, Op);
+}
+
 // === Constants ===
 struct ConstantInt_match {
   APInt *BindVal;
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index deb1d04df3400c..eda38cd8a564d6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2908,6 +2908,8 @@ class TargetLoweringBase {
     case ISD::FMAXNUM_IEEE:
     case ISD::FMINIMUM:
     case ISD::FMAXIMUM:
+    case ISD::FMINIMUMNUM:
+    case ISD::FMAXIMUMNUM:
     case ISD::AVGFLOORS:
     case ISD::AVGFLOORU:
     case ISD::AVGCEILS:
@@ -5283,6 +5285,9 @@ class TargetLowering : public TargetLoweringBase {
   /// Expand fminimum/fmaximum into multiple comparison with selects.
   SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const;
 
+  /// Expand fminimumnum/fmaximumnum into multiple comparison with selects.
+  SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const;
+
   /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
   /// \param N Node to expand
   /// \returns The expansion result
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 795cd05ea5b5e2..1185939cd9c75b 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -49,51 +49,11 @@ class StructLayout;
 class Triple;
 class Value;
 
-/// Enum used to categorize the alignment types stored by LayoutAlignElem
-enum AlignTypeEnum {
-  INTEGER_ALIGN = 'i',
-  VECTOR_ALIGN = 'v',
-  FLOAT_ALIGN = 'f',
-  AGGREGATE_ALIGN = 'a'
-};
-
 // FIXME: Currently the DataLayout string carries a "preferred alignment"
 // for types. As the DataLayout is module/global, this should likely be
 // sunk down to an FTTI element that is queried rather than a global
 // preference.
 
-/// Layout alignment element.
-///
-/// Stores the alignment data associated with a given type bit width.
-struct LayoutAlignElem {
-  uint32_t TypeBitWidth;
-  Align ABIAlign;
-  Align PrefAlign;
-
-  static LayoutAlignElem get(Align ABIAlign, Align PrefAlign,
-                             uint32_t BitWidth);
-
-  bool operator==(const LayoutAlignElem &rhs) const;
-};
-
-/// Layout pointer alignment element.
-///
-/// Stores the alignment data associated with a given pointer and address space.
-struct PointerAlignElem {
-  uint32_t AddressSpace;
-  uint32_t TypeBitWidth;
-  Align ABIAlign;
-  Align PrefAlign;
-  uint32_t IndexBitWidth;
-
-  /// Initializer
-  static PointerAlignElem getInBits(uint32_t AddressSpace, Align ABIAlign,
-                                    Align PrefAlign, uint32_t TypeBitWidth,
-                                    uint32_t IndexBitWidth);
-
-  bool operator==(const PointerAlignElem &rhs) const;
-};
-
 /// A parsed version of the target data layout string in and methods for
 /// querying it.
 ///
@@ -102,6 +62,26 @@ struct PointerAlignElem {
 /// target being codegen'd to.
 class DataLayout {
 public:
+  /// Primitive type specification.
+  struct PrimitiveSpec {
+    uint32_t BitWidth;
+    Align ABIAlign;
+    Align PrefAlign;
+
+    bool operator==(const PrimitiveSpec &Other) const;
+  };
+
+  /// Pointer type specification.
+  struct PointerSpec {
+    uint32_t AddrSpace;
+    uint32_t BitWidth;
+    Align ABIAlign;
+    Align PrefAlign;
+    uint32_t IndexBitWidth;
+
+    bool operator==(const PointerSpec &Other) const;
+  };
+
   enum class FunctionPtrAlignType {
     /// The function pointer alignment is independent of the function alignment.
     Independent,
@@ -135,20 +115,26 @@ class DataLayout {
   // FIXME: `unsigned char` truncates the value parsed by `parseSpecifier`.
   SmallVector<unsigned char, 8> LegalIntWidths;
 
-  // Primitive type specifications. Sorted and uniqued by type bit width.
-  SmallVector<LayoutAlignElem, 6> IntAlignments;
-  SmallVector<LayoutAlignElem, 4> FloatAlignments;
-  SmallVector<LayoutAlignElem, 10> VectorAlignments;
+  /// Type specifier used by some internal functions.
+  enum class TypeSpecifier {
+    Integer = 'i',
+    Float = 'f',
+    Vector = 'v',
+    Aggregate = 'a'
+  };
 
-  // Pointer type specifications. Sorted and uniqued by address space number.
-  SmallVector<PointerAlignElem, 8> Pointers;
+  /// Primitive type specifications. Sorted and uniqued by type bit width.
+  SmallVector<PrimitiveSpec, 6> IntSpecs;
+  SmallVector<PrimitiveSpec, 4> FloatSpecs;
+  SmallVector<PrimitiveSpec, 10> VectorSpecs;
+
+  /// Pointer type specifications. Sorted and uniqued by address space number.
+  SmallVector<PointerSpec, 8> PointerSpecs;
 
   /// The string representation used to create this DataLayout
   std::string StringRepresentation;
 
-  const PointerAlignElem &getPointerAlignElem(uint32_t AddressSpace) const;
-
-  // Struct type ABI and preferred alignments. The default spec is "a:8:64".
+  /// Struct type ABI and preferred alignments. The default spec is "a:8:64".
   Align StructABIAlignment = Align::Constant<1>();
   Align StructPrefAlignment = Align::Constant<8>();
 
@@ -159,16 +145,19 @@ class DataLayout {
   /// well-defined bitwise representation.
   SmallVector<unsigned, 8> NonIntegralAddressSpaces;
 
-  /// Attempts to set the alignment of the given type. Returns an error
-  /// description on failure.
-  Error setAlignment(AlignTypeEnum AlignType, Align ABIAlign, Align PrefAlign,
-                     uint32_t BitWidth);
+  /// Attempts to set the specification for the given type.
+  /// Returns an error description on failure.
+  Error setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth,
+                         Align ABIAlign, Align PrefAlign);
+
+  /// Searches for a pointer specification that matches the given address space.
+  /// Returns the default address space specification if not found.
+  const PointerSpec &getPointerSpec(uint32_t AddrSpace) const;
 
-  /// Attempts to set the alignment of a pointer in the given address space.
+  /// Attempts to set the specification for pointer in the given address space.
   /// Returns an error description on failure.
-  Error setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
-                                  Align PrefAlign, uint32_t TypeBitWidth,
-                                  uint32_t IndexBitWidth);
+  Error setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
+                       Align PrefAlign, uint32_t IndexBitWidth);
 
   /// Internal helper to get alignment for integer of given bitwidth.
   Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -375,7 +364,7 @@ class DataLayout {
   /// FIXME: The defaults need to be removed once all of
   /// the backends/clients are updated.
   unsigned getPointerSizeInBits(unsigned AS = 0) const {
-    return getPointerAlignElem(AS).TypeBitWidth;
+    return getPointerSpec(AS).BitWidth;
   }
 
   /// Returns the maximum index size over all address spaces.
@@ -385,7 +374,7 @@ class DataLayout {
 
   /// Size in bits of index used for address calculation in getelementptr.
   unsigned getIndexSizeInBits(unsigned AS) const {
-    return getPointerAlignElem(AS).IndexBitWidth;
+    return getPointerSpec(AS).IndexBitWidth;
   }
 
   /// Layout pointer size, in bits, based on the type.  If this function is
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 3f3d75012c6945..0dbcbc0b2cb76f 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1015,6 +1015,18 @@ class IRBuilderBase {
     return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name);
   }
 
+  /// Create call to the minimumnum intrinsic.
+  Value *CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateBinaryIntrinsic(Intrinsic::minimumnum, LHS, RHS, nullptr,
+                                 Name);
+  }
+
+  /// Create call to the maximum intrinsic.
+  Value *CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateBinaryIntrinsic(Intrinsic::maximumnum, LHS, RHS, nullptr,
+                                 Name);
+  }
+
   /// Create call to the copysign intrinsic.
   Value *CreateCopySign(Value *LHS, Value *RHS,
                         Instruction *FMFSource = nullptr,
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 94c8fa092f45e6..2f1e2c08c3ecec 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -76,6 +76,8 @@ class IntrinsicInst : public CallInst {
     case Intrinsic::minnum:
     case Intrinsic::maximum:
     case Intrinsic::minimum:
+    case Intrinsic::maximumnum:
+    case Intrinsic::minimumnum:
     case Intrinsic::smax:
     case Intrinsic::smin:
     case Intrinsic::umax:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b4e758136b39fb..0841273fd2e1e5 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1085,6 +1085,14 @@ def int_maximum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>],
   [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
 >;
+def int_minimumnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
+>;
+def int_maximumnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
+>;
 
 // Internal interface for object size checking
 def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty],
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 89aaf6d1ad83f8..c3d5ef9f4e4f82 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -299,6 +299,16 @@ HANDLE_LIBCALL(FMAX_F64, "fmax")
 HANDLE_LIBCALL(FMAX_F80, "fmaxl")
 HANDLE_LIBCALL(FMAX_F128, "fmaxl")
 HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
+HANDLE_LIBCALL(FMINIMUMNUM_F32, "fminimum_numf")
+HANDLE_LIBCALL(FMINIMUMNUM_F64, "fminimum_num")
+HANDLE_LIBCALL(FMINIMUMNUM_F80, "fminimum_numl")
+HANDLE_LIBCALL(FMINIMUMNUM_F128, "fminmum_numl")
+HANDLE_LIBCALL(FMINIMUMNUM_PPCF128, "fminimum_numl")
+HANDLE_LIBCALL(FMAXIMUMNUM_F32, "fmaximum_numf")
+HANDLE_LIBCALL(FMAXIMUMNUM_F64, "fmaximum_num")
+HANDLE_LIBCALL(FMAXIMUMNUM_F80, "fmaximum_numl")
+HANDLE_LIBCALL(FMAXIMUMNUM_F128, "fmaxmum_numl")
+HANDLE_LIBCALL(FMAXIMUMNUM_PPCF128, "fmaximum_numl")
 HANDLE_LIBCALL(LROUND_F32, "lroundf")
 HANDLE_LIBCALL(LROUND_F64, "lround")
 HANDLE_LIBCALL(LROUND_F80, "lroundl")
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index e40ad2062166ea..172deffbd31771 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -517,6 +517,10 @@ def fminimum   : SDNode<"ISD::FMINIMUM"   , SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def fmaximum   : SDNode<"ISD::FMAXIMUM"   , SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
+def fminimumnum   : SDNode<"ISD::FMINIMUMNUM"   , SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def fmaximumnum   : SDNode<"ISD::FMAXIMUMNUM"   , SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
 def fgetsign   : SDNode<"ISD::FGETSIGN"   , SDTFPToIntOp>;
 def fcanonicalize : SDNode<"ISD::FCANONICALIZE", SDTFPUnaryOp>;
 def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index fbae705127538a..7b4666b29a1936 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/Analysis.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/ProfileData/PGOCtxProfReader.h"
@@ -64,10 +65,39 @@ Value toJSON(const PGOCtxProfContext::CallTargetMapTy &P) {
 } // namespace json
 } // namespace llvm
 
+const char *AssignGUIDPass::GUIDMetadataName = "guid";
+
+PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  for (auto &F : M.functions()) {
+    if (F.isDeclaration())
+      continue;
+    if (F.getMetadata(GUIDMetadataName))
+      continue;
+    const GlobalValue::GUID GUID = F.getGUID();
+    F.setMetadata(GUIDMetadataName,
+                  MDNode::get(M.getContext(),
+                              {ConstantAsMetadata::get(ConstantInt::get(
+                                  Type::getInt64Ty(M.getContext()), GUID))}));
+  }
+  return PreservedAnalyses::none();
+}
+
+GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) {
+  if (F.isDeclaration()) {
+    assert(GlobalValue::isExternalLinkage(F.getLinkage()));
+    return GlobalValue::getGUID(F.getGlobalIdentifier());
+  }
+  auto *MD = F.getMetadata(GUIDMetadataName);
+  assert(MD && "guid not found for defined function");
+  return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
+                               ->getValue()
+                               ->stripPointerCasts())
+      ->getZExtValue();
+}
 AnalysisKey CtxProfAnalysis::Key;
 
-CtxProfAnalysis::Result CtxProfAnalysis::run(Module &M,
-                                             ModuleAnalysisManager &MAM) {
+PGOContextualProfile CtxProfAnalysis::run(Module &M,
+                                          ModuleAnalysisManager &MAM) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFile(Profile);
   if (auto EC = MB.getError()) {
     M.getContext().emitError("could not open contextual profile file: " +
@@ -81,7 +111,55 @@ CtxProfAnalysis::Result CtxProfAnalysis::run(Module &M,
                              toString(MaybeCtx.takeError()));
     return {};
   }
-  return Result(std::move(*MaybeCtx));
+
+  PGOContextualProfile Result;
+
+  for (const auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+    auto GUID = AssignGUIDPass::getGUID(F);
+    assert(GUID && "guid not found for defined function");
+    const auto &Entry = F.begin();
+    uint32_t MaxCounters = 0; // we expect at least a counter.
+    for (const auto &I : *Entry)
+      if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
+        MaxCounters =
+            static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
+        break;
+      }
+    if (!MaxCounters)
+      continue;
+    uint32_t MaxCallsites = 0;
+    for (const auto &BB : F)
+      for (const auto &I : BB)
+        if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
+          MaxCallsites =
+              static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
+          break;
+        }
+    auto [It, Ins] = Result.FuncInfo.insert(
+        {GUID, PGOContextualProfile::FunctionInfo(F.getName())});
+    (void)Ins;
+    assert(Ins);
+    It->second.NextCallsiteIndex = MaxCallsites;
+    It->second.NextCounterIndex = MaxCounters;
+  }
+  // If we made it this far, the Result is valid - which we mark by setting
+  // .Profiles.
+  // Trim first the roots that aren't in this module.
+  DenseSet<GlobalValue::GUID> ProfiledGUIDs;
+  for (auto &[RootGuid, _] : llvm::make_early_inc_range(*MaybeCtx))
+    if (!Result.FuncInfo.contains(RootGuid))
+      MaybeCtx->erase(RootGuid);
+  Result.Profiles = std::move(*MaybeCtx);
+  return Result;
+}
+
+GlobalValue::GUID
+PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {
+  if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())
+    return It->first;
+  return 0;
 }
 
 PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
@@ -91,8 +169,16 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
     M.getContext().emitError("Invalid CtxProfAnalysis");
     return PreservedAnalyses::all();
   }
+
+  OS << "Function Info:\n";
+  for (const auto &[Guid, FuncInfo] : C.FuncInfo)
+    OS << Guid << " : " << FuncInfo.Name
+       << ". MaxCounterID: " << FuncInfo.NextCounterIndex
+       << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
+
   const auto JSONed = ::llvm::json::toJSON(C.profiles());
 
+  OS << "\nCurrent Profile:\n";
   OS << formatv("{0:2}", JSONed);
   OS << "\n";
   return PreservedAnalyses::all();
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0aa8b82f533f2a..25644c24855a62 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1939,7 +1939,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
   case ISD::FMINIMUM:
-  case ISD::FMAXIMUM:           return visitFMinMax(N);
+  case ISD::FMAXIMUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:       return visitFMinMax(N);
   case ISD::FCEIL:              return visitFCEIL(N);
   case ISD::FTRUNC:             return visitFTRUNC(N);
   case ISD::FFREXP:             return visitFFREXP(N);
@@ -6068,6 +6070,7 @@ static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
   return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
 }
 
+// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
 static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
                                      ISD::CondCode CC, unsigned OrAndOpcode,
                                      SelectionDAG &DAG,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3eadfbf51ddaa1..e7f765382b0e46 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3660,6 +3660,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       Results.push_back(Expanded);
     break;
   }
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM: {
+    Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
+    break;
+  }
   case ISD::FSIN:
   case ISD::FCOS: {
     EVT VT = Node->getValueType(0);
@@ -4539,6 +4544,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                     RTLIB::FMAX_F80, RTLIB::FMAX_F128,
                     RTLIB::FMAX_PPCF128, Results);
     break;
+  case ISD::FMINIMUMNUM:
+    ExpandFPLibCall(Node, RTLIB::FMINIMUMNUM_F32, RTLIB::FMINIMUMNUM_F64,
+                    RTLIB::FMINIMUMNUM_F80, RTLIB::FMINIMUMNUM_F128,
+                    RTLIB::FMINIMUMNUM_PPCF128, Results);
+    break;
+  case ISD::FMAXIMUMNUM:
+    ExpandFPLibCall(Node, RTLIB::FMAXIMUMNUM_F32, RTLIB::FMAXIMUMNUM_F64,
+                    RTLIB::FMAXIMUMNUM_F80, RTLIB::FMAXIMUMNUM_F128,
+                    RTLIB::FMAXIMUMNUM_PPCF128, Results);
+    break;
   case ISD::FSQRT:
   case ISD::STRICT_FSQRT:
     ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
@@ -5464,6 +5479,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FMAXNUM:
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:
   case ISD::FPOW:
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 0c881d81a2c639..ad0c054d3ccd50 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -74,6 +74,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FMINNUM:     R = SoftenFloatRes_FMINNUM(N); break;
     case ISD::STRICT_FMAXNUM:
     case ISD::FMAXNUM:     R = SoftenFloatRes_FMAXNUM(N); break;
+    case ISD::FMINIMUMNUM:    R = SoftenFloatRes_FMINIMUMNUM(N); break;
+    case ISD::FMAXIMUMNUM:    R = SoftenFloatRes_FMAXIMUMNUM(N); break;
     case ISD::STRICT_FADD:
     case ISD::FADD:        R = SoftenFloatRes_FADD(N); break;
     case ISD::STRICT_FACOS:
@@ -323,6 +325,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
                                                RTLIB::FMAX_PPCF128));
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMINIMUMNUM(SDNode *N) {
+  return SoftenFloatRes_Binary(
+      N, GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUMNUM_F32,
+                      RTLIB::FMINIMUMNUM_F64, RTLIB::FMINIMUMNUM_F80,
+                      RTLIB::FMINIMUMNUM_F128, RTLIB::FMINIMUMNUM_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXIMUMNUM(SDNode *N) {
+  return SoftenFloatRes_Binary(
+      N, GetFPLibCall(N->getValueType(0), RTLIB::FMAXIMUMNUM_F32,
+                      RTLIB::FMAXIMUMNUM_F64, RTLIB::FMAXIMUMNUM_F80,
+                      RTLIB::FMAXIMUMNUM_F128, RTLIB::FMAXIMUMNUM_PPCF128));
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
   return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
                                                RTLIB::ADD_F32,
@@ -1404,6 +1420,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FMINNUM:    ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
   case ISD::STRICT_FMAXNUM:
   case ISD::FMAXNUM:    ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+  case ISD::FMINIMUMNUM: ExpandFloatRes_FMINIMUMNUM(N, Lo, Hi); break;
+  case ISD::FMAXIMUMNUM: ExpandFloatRes_FMAXIMUMNUM(N, Lo, Hi); break;
   case ISD::STRICT_FADD:
   case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
   case ISD::STRICT_FACOS:
@@ -1558,6 +1576,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
                                         RTLIB::FMAX_PPCF128), Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  ExpandFloatRes_Binary(
+      N,
+      GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUMNUM_F32,
+                   RTLIB::FMINIMUMNUM_F64, RTLIB::FMINIMUMNUM_F80,
+                   RTLIB::FMINIMUMNUM_F128, RTLIB::FMINIMUMNUM_PPCF128),
+      Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMAXIMUMNUM(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  ExpandFloatRes_Binary(
+      N,
+      GetFPLibCall(N->getValueType(0), RTLIB::FMAXIMUMNUM_F32,
+                   RTLIB::FMAXIMUMNUM_F64, RTLIB::FMAXIMUMNUM_F80,
+                   RTLIB::FMAXIMUMNUM_F128, RTLIB::FMAXIMUMNUM_PPCF128),
+      Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
@@ -2621,6 +2659,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FDIV:
     case ISD::FMAXIMUM:
     case ISD::FMINIMUM:
+    case ISD::FMAXIMUMNUM:
+    case ISD::FMINIMUMNUM:
     case ISD::FMAXNUM:
     case ISD::FMINNUM:
     case ISD::FMAXNUM_IEEE:
@@ -3063,6 +3103,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
   case ISD::FDIV:
   case ISD::FMAXIMUM:
   case ISD::FMINIMUM:
+  case ISD::FMAXIMUMNUM:
+  case ISD::FMINIMUMNUM:
   case ISD::FMAXNUM:
   case ISD::FMINNUM:
   case ISD::FMUL:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3a49a8ff10860a..6de1e3eca7feda 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -567,6 +567,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatRes_FATAN(SDNode *N);
   SDValue SoftenFloatRes_FMINNUM(SDNode *N);
   SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
+  SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N);
+  SDValue SoftenFloatRes_FMAXIMUMNUM(SDNode *N);
   SDValue SoftenFloatRes_FADD(SDNode *N);
   SDValue SoftenFloatRes_FCBRT(SDNode *N);
   SDValue SoftenFloatRes_FCEIL(SDNode *N);
@@ -659,6 +661,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_FATAN     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMINNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMAXNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMAXIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCBRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ab12c3b0e728a8..7bf90ceb93cb4e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5465,7 +5465,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
     return false;
   }
   case ISD::FMINNUM:
-  case ISD::FMAXNUM: {
+  case ISD::FMAXNUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM: {
     // Only one needs to be known not-nan, since it will be returned if the
     // other ends up being one.
     return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
@@ -6804,6 +6806,10 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
       return getConstantFP(minimum(C1, C2), DL, VT);
     case ISD::FMAXIMUM:
       return getConstantFP(maximum(C1, C2), DL, VT);
+    case ISD::FMINIMUMNUM:
+      return getConstantFP(minimumnum(C1, C2), DL, VT);
+    case ISD::FMAXIMUMNUM:
+      return getConstantFP(maximumnum(C1, C2), DL, VT);
     default: break;
     }
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 37ba62911ec70b..7cdd3d47b641d7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6882,6 +6882,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1)), Flags));
     return;
+  case Intrinsic::minimumnum:
+    setValue(&I, DAG.getNode(ISD::FMINIMUMNUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)), Flags));
+    return;
+  case Intrinsic::maximumnum:
+    setValue(&I, DAG.getNode(ISD::FMAXIMUMNUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)), Flags));
+    return;
   case Intrinsic::copysign:
     setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
@@ -9257,6 +9269,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
         if (visitBinaryFloatCall(I, ISD::FMAXNUM))
           return;
         break;
+      case LibFunc_fminimum_num:
+      case LibFunc_fminimum_numf:
+      case LibFunc_fminimum_numl:
+        if (visitBinaryFloatCall(I, ISD::FMINIMUMNUM))
+          return;
+        break;
+      case LibFunc_fmaximum_num:
+      case LibFunc_fmaximum_numf:
+      case LibFunc_fmaximum_numl:
+        if (visitBinaryFloatCall(I, ISD::FMAXIMUMNUM))
+          return;
+        break;
       case LibFunc_sin:
       case LibFunc_sinf:
       case LibFunc_sinl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 46e8e54ee4ed7d..001f782f209fdb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -203,6 +203,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STRICT_FMINIMUM:            return "strict_fminimum";
   case ISD::FMAXIMUM:                   return "fmaximum";
   case ISD::STRICT_FMAXIMUM:            return "strict_fmaximum";
+  case ISD::FMINIMUMNUM:                return "fminimumnum";
+  case ISD::FMAXIMUMNUM:                return "fmaximumnum";
   case ISD::FNEG:                       return "fneg";
   case ISD::FSQRT:                      return "fsqrt";
   case ISD::STRICT_FSQRT:               return "strict_fsqrt";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4cf1e655b00990..2c939967a5e1d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8558,6 +8558,94 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
   return MinMax;
 }
 
+SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
+                                                      SelectionDAG &DAG) const {
+  SDLoc DL(Node);
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  unsigned Opc = Node->getOpcode();
+  EVT VT = Node->getValueType(0);
+  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  bool IsMax = Opc == ISD::FMAXIMUMNUM;
+  const TargetOptions &Options = DAG.getTarget().Options;
+  SDNodeFlags Flags = Node->getFlags();
+
+  unsigned NewOp =
+      Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+
+  if (isOperationLegalOrCustom(NewOp, VT)) {
+    if (!Flags.hasNoNaNs()) {
+      // Insert canonicalizes if it's possible we need to quiet to get correct
+      // sNaN behavior.
+      if (!DAG.isKnownNeverSNaN(LHS)) {
+        LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
+      }
+      if (!DAG.isKnownNeverSNaN(RHS)) {
+        RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
+      }
+    }
+
+    return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
+  }
+
+  // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
+  // same behaviors for all of other cases: +0.0 vs -0.0 included.
+  if (Flags.hasNoNaNs() ||
+      (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
+    unsigned IEEE2019Op =
+        Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
+    if (isOperationLegalOrCustom(IEEE2019Op, VT))
+      return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
+  }
+
+  // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
+  // either one for +0.0 vs -0.0.
+  if ((Flags.hasNoNaNs() ||
+       (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
+      (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
+       DAG.isKnownNeverZeroFloat(RHS))) {
+    unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
+    if (isOperationLegalOrCustom(IEEE2008Op, VT))
+      return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
+  }
+
+  // If only one operand is NaN, override it with another operand.
+  if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
+    LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
+  }
+  if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
+    RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
+  }
+
+  SDValue MinMax =
+      DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
+  // If MinMax is NaN, let's quiet it.
+  if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
+      !DAG.isKnownNeverNaN(RHS)) {
+    SDValue MinMaxQuiet =
+        DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    MinMax =
+        DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
+  }
+
+  // Fixup signed zero behavior.
+  if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
+      DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
+    return MinMax;
+  }
+  SDValue TestZero =
+      DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
+  SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+                                DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+  SDValue LCmp = DAG.getSelect(
+      DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+      MinMax, Flags);
+  SDValue RCmp = DAG.getSelect(
+      DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
+      Flags);
+  return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
+}
+
 /// Returns a true value if if this FPClassTest can be performed with an ordered
 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
 /// std::nullopt if it cannot be performed as a compare with 0.
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 149b5dabee0565..4ff8617f740c89 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -713,6 +713,7 @@ void TargetLoweringBase::initActions() {
                         ISD::FMINNUM,        ISD::FMAXNUM,
                         ISD::FMINNUM_IEEE,   ISD::FMAXNUM_IEEE,
                         ISD::FMINIMUM,       ISD::FMAXIMUM,
+                        ISD::FMINIMUMNUM,    ISD::FMAXIMUMNUM,
                         ISD::FMAD,           ISD::SMIN,
                         ISD::SMAX,           ISD::UMIN,
                         ISD::UMAX,           ISD::ABS,
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 83fec194d73904..f9b070e6f1eae4 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2022,8 +2022,8 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
           Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
     }
 
-    llvm::for_each(llvm::reverse(ToBeDeleted),
-                   [](Instruction *I) { I->eraseFromParent(); });
+    for (Instruction *I : llvm::reverse(ToBeDeleted))
+      I->eraseFromParent();
   };
 
   addOutlineInfo(std::move(OI));
@@ -7049,8 +7049,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
     }
 
     StaleCI->eraseFromParent();
-    llvm::for_each(llvm::reverse(ToBeDeleted),
-                   [](Instruction *I) { I->eraseFromParent(); });
+    for (Instruction *I : llvm::reverse(ToBeDeleted))
+      I->eraseFromParent();
   };
   addOutlineInfo(std::move(OI));
 
@@ -8345,9 +8345,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
                            omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
                        Args);
 
-    llvm::for_each(llvm::reverse(ToBeDeleted),
-                   [](Instruction *I) { I->eraseFromParent(); });
-
+    for (Instruction *I : llvm::reverse(ToBeDeleted))
+      I->eraseFromParent();
   };
 
   if (!Config.isTargetDevice())
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 530979c75063b4..44cd1e69818953 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -139,53 +139,20 @@ class StructLayoutMap {
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
-// LayoutAlignElem, LayoutAlign support
-//===----------------------------------------------------------------------===//
-
-LayoutAlignElem LayoutAlignElem::get(Align ABIAlign, Align PrefAlign,
-                                     uint32_t BitWidth) {
-  assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
-  LayoutAlignElem retval;
-  retval.ABIAlign = ABIAlign;
-  retval.PrefAlign = PrefAlign;
-  retval.TypeBitWidth = BitWidth;
-  return retval;
-}
-
-bool LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
-  return ABIAlign == rhs.ABIAlign && PrefAlign == rhs.PrefAlign &&
-         TypeBitWidth == rhs.TypeBitWidth;
-}
-
-//===----------------------------------------------------------------------===//
-// PointerAlignElem, PointerAlign support
+//                       DataLayout Class Implementation
 //===----------------------------------------------------------------------===//
 
-PointerAlignElem PointerAlignElem::getInBits(uint32_t AddressSpace,
-                                             Align ABIAlign, Align PrefAlign,
-                                             uint32_t TypeBitWidth,
-                                             uint32_t IndexBitWidth) {
-  assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
-  PointerAlignElem retval;
-  retval.AddressSpace = AddressSpace;
-  retval.ABIAlign = ABIAlign;
-  retval.PrefAlign = PrefAlign;
-  retval.TypeBitWidth = TypeBitWidth;
-  retval.IndexBitWidth = IndexBitWidth;
-  return retval;
+bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
+  return BitWidth == Other.BitWidth && ABIAlign == Other.ABIAlign &&
+         PrefAlign == Other.PrefAlign;
 }
 
-bool
-PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
-  return (ABIAlign == rhs.ABIAlign && AddressSpace == rhs.AddressSpace &&
-          PrefAlign == rhs.PrefAlign && TypeBitWidth == rhs.TypeBitWidth &&
-          IndexBitWidth == rhs.IndexBitWidth);
+bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
+  return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
+         ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
+         IndexBitWidth == Other.IndexBitWidth;
 }
 
-//===----------------------------------------------------------------------===//
-//                       DataLayout Class Implementation
-//===----------------------------------------------------------------------===//
-
 const char *DataLayout::getManglingComponent(const Triple &T) {
   if (T.isOSBinFormatGOFF())
     return "-m:l";
@@ -200,34 +167,34 @@ const char *DataLayout::getManglingComponent(const Triple &T) {
 
 // Default primitive type specifications.
 // NOTE: These arrays must be sorted by type bit width.
-constexpr LayoutAlignElem DefaultIntSpecs[] = {
+constexpr DataLayout::PrimitiveSpec DefaultIntSpecs[] = {
     {1, Align::Constant<1>(), Align::Constant<1>()},  // i1:8:8
     {8, Align::Constant<1>(), Align::Constant<1>()},  // i8:8:8
     {16, Align::Constant<2>(), Align::Constant<2>()}, // i16:16:16
     {32, Align::Constant<4>(), Align::Constant<4>()}, // i32:32:32
     {64, Align::Constant<4>(), Align::Constant<8>()}, // i64:32:64
 };
-constexpr LayoutAlignElem DefaultFloatSpecs[] = {
+constexpr DataLayout::PrimitiveSpec DefaultFloatSpecs[] = {
     {16, Align::Constant<2>(), Align::Constant<2>()},    // f16:16:16
     {32, Align::Constant<4>(), Align::Constant<4>()},    // f32:32:32
     {64, Align::Constant<8>(), Align::Constant<8>()},    // f64:64:64
     {128, Align::Constant<16>(), Align::Constant<16>()}, // f128:128:128
 };
-constexpr LayoutAlignElem DefaultVectorSpecs[] = {
+constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
     {64, Align::Constant<8>(), Align::Constant<8>()},    // v64:64:64
     {128, Align::Constant<16>(), Align::Constant<16>()}, // v128:128:128
 };
 
 // Default pointer type specifications.
-constexpr PointerAlignElem DefaultPointerSpecs[] = {
+constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
     {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64} // p0:64:64:64:64
 };
 
 DataLayout::DataLayout()
-    : IntAlignments(ArrayRef(DefaultIntSpecs)),
-      FloatAlignments(ArrayRef(DefaultFloatSpecs)),
-      VectorAlignments(ArrayRef(DefaultVectorSpecs)),
-      Pointers(ArrayRef(DefaultPointerSpecs)) {}
+    : IntSpecs(ArrayRef(DefaultIntSpecs)),
+      FloatSpecs(ArrayRef(DefaultFloatSpecs)),
+      VectorSpecs(ArrayRef(DefaultVectorSpecs)),
+      PointerSpecs(ArrayRef(DefaultPointerSpecs)) {}
 
 DataLayout::DataLayout(StringRef LayoutString) : DataLayout() {
   if (Error Err = parseSpecifier(LayoutString))
@@ -247,10 +214,10 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
   TheFunctionPtrAlignType = Other.TheFunctionPtrAlignType;
   ManglingMode = Other.ManglingMode;
   LegalIntWidths = Other.LegalIntWidths;
-  IntAlignments = Other.IntAlignments;
-  FloatAlignments = Other.FloatAlignments;
-  VectorAlignments = Other.VectorAlignments;
-  Pointers = Other.Pointers;
+  IntSpecs = Other.IntSpecs;
+  FloatSpecs = Other.FloatSpecs;
+  VectorSpecs = Other.VectorSpecs;
+  PointerSpecs = Other.PointerSpecs;
   StructABIAlignment = Other.StructABIAlignment;
   StructPrefAlignment = Other.StructPrefAlignment;
   NonIntegralAddressSpaces = Other.NonIntegralAddressSpaces;
@@ -268,11 +235,9 @@ bool DataLayout::operator==(const DataLayout &Other) const {
          FunctionPtrAlign == Other.FunctionPtrAlign &&
          TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
          ManglingMode == Other.ManglingMode &&
-         LegalIntWidths == Other.LegalIntWidths &&
-         IntAlignments == Other.IntAlignments &&
-         FloatAlignments == Other.FloatAlignments &&
-         VectorAlignments == Other.VectorAlignments &&
-         Pointers == Other.Pointers &&
+         LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs &&
+         FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs &&
+         PointerSpecs == Other.PointerSpecs &&
          StructABIAlignment == Other.StructABIAlignment &&
          StructPrefAlignment == Other.StructPrefAlignment;
 }
@@ -361,10 +326,10 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
       continue;
     }
 
-    char Specifier = Tok.front();
+    char SpecifierChar = Tok.front();
     Tok = Tok.substr(1);
 
-    switch (Specifier) {
+    switch (SpecifierChar) {
     case 's':
       // Deprecated, but ignoring here to preserve loading older textual llvm
       // ASM file
@@ -433,9 +398,9 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
             return reportError("Invalid index size of 0 bytes");
         }
       }
-      if (Error Err = setPointerAlignmentInBits(
-              AddrSpace, assumeAligned(PointerABIAlign),
-              assumeAligned(PointerPrefAlign), PointerMemSize, IndexSize))
+      if (Error Err = setPointerSpec(
+              AddrSpace, PointerMemSize, assumeAligned(PointerABIAlign),
+              assumeAligned(PointerPrefAlign), IndexSize))
         return Err;
       break;
     }
@@ -443,13 +408,22 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
     case 'v':
     case 'f':
     case 'a': {
-      AlignTypeEnum AlignType;
-      switch (Specifier) {
-      default: llvm_unreachable("Unexpected specifier!");
-      case 'i': AlignType = INTEGER_ALIGN; break;
-      case 'v': AlignType = VECTOR_ALIGN; break;
-      case 'f': AlignType = FLOAT_ALIGN; break;
-      case 'a': AlignType = AGGREGATE_ALIGN; break;
+      TypeSpecifier Specifier;
+      switch (SpecifierChar) {
+      default:
+        llvm_unreachable("Unexpected specifier!");
+      case 'i':
+        Specifier = TypeSpecifier::Integer;
+        break;
+      case 'v':
+        Specifier = TypeSpecifier::Vector;
+        break;
+      case 'f':
+        Specifier = TypeSpecifier::Float;
+        break;
+      case 'a':
+        Specifier = TypeSpecifier::Aggregate;
+        break;
       }
 
       // Bit size.
@@ -458,7 +432,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
         if (Error Err = getInt(Tok, Size))
           return Err;
 
-      if (AlignType == AGGREGATE_ALIGN && Size != 0)
+      if (Specifier == TypeSpecifier::Aggregate && Size != 0)
         return reportError(
             "Sized aggregate specification in datalayout string");
 
@@ -471,7 +445,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
       unsigned ABIAlign;
       if (Error Err = getIntInBytes(Tok, ABIAlign))
         return Err;
-      if (AlignType != AGGREGATE_ALIGN && !ABIAlign)
+      if (Specifier != TypeSpecifier::Aggregate && !ABIAlign)
         return reportError(
             "ABI alignment specification must be >0 for non-aggregate types");
 
@@ -479,7 +453,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
         return reportError("Invalid ABI alignment, must be a 16bit integer");
       if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign))
         return reportError("Invalid ABI alignment, must be a power of 2");
-      if (AlignType == INTEGER_ALIGN && Size == 8 && ABIAlign != 1)
+      if (Specifier == TypeSpecifier::Integer && Size == 8 && ABIAlign != 1)
         return reportError(
             "Invalid ABI alignment, i8 must be naturally aligned");
 
@@ -498,8 +472,8 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
       if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign))
         return reportError("Invalid preferred alignment, must be a power of 2");
 
-      if (Error Err = setAlignment(AlignType, assumeAligned(ABIAlign),
-                                   assumeAligned(PrefAlign), Size))
+      if (Error Err = setPrimitiveSpec(Specifier, Size, assumeAligned(ABIAlign),
+                                       assumeAligned(PrefAlign)))
         return Err;
 
       break;
@@ -607,16 +581,17 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
   return Error::success();
 }
 
-static SmallVectorImpl<LayoutAlignElem>::const_iterator
-findAlignmentLowerBound(const SmallVectorImpl<LayoutAlignElem> &Alignments,
-                        uint32_t BitWidth) {
-  return partition_point(Alignments, [BitWidth](const LayoutAlignElem &E) {
-    return E.TypeBitWidth < BitWidth;
+static SmallVectorImpl<DataLayout::PrimitiveSpec>::const_iterator
+findPrimitiveSpecLowerBound(
+    const SmallVectorImpl<DataLayout::PrimitiveSpec> &Specs,
+    uint32_t BitWidth) {
+  return partition_point(Specs, [BitWidth](const DataLayout::PrimitiveSpec &E) {
+    return E.BitWidth < BitWidth;
   });
 }
 
-Error DataLayout::setAlignment(AlignTypeEnum AlignType, Align ABIAlign,
-                               Align PrefAlign, uint32_t BitWidth) {
+Error DataLayout::setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth,
+                                   Align ABIAlign, Align PrefAlign) {
   // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as
   // uint16_t, it is unclear if there are requirements for alignment to be less
   // than 2^16 other than storage. In the meantime we leave the restriction as
@@ -628,74 +603,72 @@ Error DataLayout::setAlignment(AlignTypeEnum AlignType, Align ABIAlign,
     return reportError(
         "Preferred alignment cannot be less than the ABI alignment");
 
-  SmallVectorImpl<LayoutAlignElem> *Alignments;
-  switch (AlignType) {
-  case AGGREGATE_ALIGN:
+  SmallVectorImpl<PrimitiveSpec> *Specs;
+  switch (Specifier) {
+  case TypeSpecifier::Aggregate:
     StructABIAlignment = ABIAlign;
     StructPrefAlignment = PrefAlign;
     return Error::success();
-  case INTEGER_ALIGN:
-    Alignments = &IntAlignments;
+  case TypeSpecifier::Integer:
+    Specs = &IntSpecs;
     break;
-  case FLOAT_ALIGN:
-    Alignments = &FloatAlignments;
+  case TypeSpecifier::Float:
+    Specs = &FloatSpecs;
     break;
-  case VECTOR_ALIGN:
-    Alignments = &VectorAlignments;
+  case TypeSpecifier::Vector:
+    Specs = &VectorSpecs;
     break;
   }
 
-  auto I = partition_point(*Alignments, [BitWidth](const LayoutAlignElem &E) {
-    return E.TypeBitWidth < BitWidth;
+  auto I = partition_point(*Specs, [BitWidth](const PrimitiveSpec &E) {
+    return E.BitWidth < BitWidth;
   });
-  if (I != Alignments->end() && I->TypeBitWidth == BitWidth) {
+  if (I != Specs->end() && I->BitWidth == BitWidth) {
     // Update the abi, preferred alignments.
     I->ABIAlign = ABIAlign;
     I->PrefAlign = PrefAlign;
   } else {
     // Insert before I to keep the vector sorted.
-    Alignments->insert(I, LayoutAlignElem::get(ABIAlign, PrefAlign, BitWidth));
+    Specs->insert(I, PrimitiveSpec{BitWidth, ABIAlign, PrefAlign});
   }
   return Error::success();
 }
 
-const PointerAlignElem &
-DataLayout::getPointerAlignElem(uint32_t AddressSpace) const {
-  if (AddressSpace != 0) {
-    auto I = lower_bound(Pointers, AddressSpace,
-                         [](const PointerAlignElem &A, uint32_t AddressSpace) {
-      return A.AddressSpace < AddressSpace;
-    });
-    if (I != Pointers.end() && I->AddressSpace == AddressSpace)
+const DataLayout::PointerSpec &
+DataLayout::getPointerSpec(uint32_t AddrSpace) const {
+  if (AddrSpace != 0) {
+    auto I = lower_bound(PointerSpecs, AddrSpace,
+                         [](const PointerSpec &Spec, uint32_t AddrSpace) {
+                           return Spec.AddrSpace < AddrSpace;
+                         });
+    if (I != PointerSpecs.end() && I->AddrSpace == AddrSpace)
       return *I;
   }
 
-  assert(Pointers[0].AddressSpace == 0);
-  return Pointers[0];
+  assert(PointerSpecs[0].AddrSpace == 0);
+  return PointerSpecs[0];
 }
 
-Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
-                                            Align PrefAlign,
-                                            uint32_t TypeBitWidth,
-                                            uint32_t IndexBitWidth) {
+Error DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
+                                 Align ABIAlign, Align PrefAlign,
+                                 uint32_t IndexBitWidth) {
   if (PrefAlign < ABIAlign)
     return reportError(
         "Preferred alignment cannot be less than the ABI alignment");
-  if (IndexBitWidth > TypeBitWidth)
+  if (IndexBitWidth > BitWidth)
     return reportError("Index width cannot be larger than pointer width");
 
-  auto I = lower_bound(Pointers, AddrSpace,
-                       [](const PointerAlignElem &A, uint32_t AddressSpace) {
-    return A.AddressSpace < AddressSpace;
-  });
-  if (I == Pointers.end() || I->AddressSpace != AddrSpace) {
-    Pointers.insert(I,
-                    PointerAlignElem::getInBits(AddrSpace, ABIAlign, PrefAlign,
-                                                TypeBitWidth, IndexBitWidth));
+  auto I = lower_bound(PointerSpecs, AddrSpace,
+                       [](const PointerSpec &A, uint32_t AddrSpace) {
+                         return A.AddrSpace < AddrSpace;
+                       });
+  if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
+    PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
+                                       IndexBitWidth});
   } else {
+    I->BitWidth = BitWidth;
     I->ABIAlign = ABIAlign;
     I->PrefAlign = PrefAlign;
-    I->TypeBitWidth = TypeBitWidth;
     I->IndexBitWidth = IndexBitWidth;
   }
   return Error::success();
@@ -703,11 +676,11 @@ Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
 
 Align DataLayout::getIntegerAlignment(uint32_t BitWidth,
                                       bool abi_or_pref) const {
-  auto I = findAlignmentLowerBound(IntAlignments, BitWidth);
+  auto I = findPrimitiveSpecLowerBound(IntSpecs, BitWidth);
   // If we don't have an exact match, use alignment of next larger integer
   // type. If there is none, use alignment of largest integer type by going
   // back one element.
-  if (I == IntAlignments.end())
+  if (I == IntSpecs.end())
     --I;
   return abi_or_pref ? I->ABIAlign : I->PrefAlign;
 }
@@ -737,22 +710,22 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
 }
 
 Align DataLayout::getPointerABIAlignment(unsigned AS) const {
-  return getPointerAlignElem(AS).ABIAlign;
+  return getPointerSpec(AS).ABIAlign;
 }
 
 Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
-  return getPointerAlignElem(AS).PrefAlign;
+  return getPointerSpec(AS).PrefAlign;
 }
 
 unsigned DataLayout::getPointerSize(unsigned AS) const {
-  return divideCeil(getPointerAlignElem(AS).TypeBitWidth, 8);
+  return divideCeil(getPointerSpec(AS).BitWidth, 8);
 }
 
 unsigned DataLayout::getMaxIndexSize() const {
   unsigned MaxIndexSize = 0;
-  for (auto &P : Pointers)
+  for (const PointerSpec &Spec : PointerSpecs)
     MaxIndexSize =
-        std::max(MaxIndexSize, (unsigned)divideCeil(P.TypeBitWidth, 8));
+        std::max(MaxIndexSize, (unsigned)divideCeil(Spec.BitWidth, 8));
 
   return MaxIndexSize;
 }
@@ -765,7 +738,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
 }
 
 unsigned DataLayout::getIndexSize(unsigned AS) const {
-  return divideCeil(getPointerAlignElem(AS).IndexBitWidth, 8);
+  return divideCeil(getPointerSpec(AS).IndexBitWidth, 8);
 }
 
 unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
@@ -819,8 +792,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
   case Type::FP128TyID:
   case Type::X86_FP80TyID: {
     unsigned BitWidth = getTypeSizeInBits(Ty).getFixedValue();
-    auto I = findAlignmentLowerBound(FloatAlignments, BitWidth);
-    if (I != FloatAlignments.end() && I->TypeBitWidth == BitWidth)
+    auto I = findPrimitiveSpecLowerBound(FloatSpecs, BitWidth);
+    if (I != FloatSpecs.end() && I->BitWidth == BitWidth)
       return abi_or_pref ? I->ABIAlign : I->PrefAlign;
 
     // If we still couldn't find a reasonable default alignment, fall back
@@ -834,8 +807,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
   case Type::FixedVectorTyID:
   case Type::ScalableVectorTyID: {
     unsigned BitWidth = getTypeSizeInBits(Ty).getKnownMinValue();
-    auto I = findAlignmentLowerBound(VectorAlignments, BitWidth);
-    if (I != VectorAlignments.end() && I->TypeBitWidth == BitWidth)
+    auto I = findPrimitiveSpecLowerBound(VectorSpecs, BitWidth);
+    if (I != VectorSpecs.end() && I->BitWidth == BitWidth)
       return abi_or_pref ? I->ABIAlign : I->PrefAlign;
 
     // By default, use natural alignment for vector types. This is consistent
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6927a2886b962b..0201e69f3e216a 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -1196,6 +1197,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
     // In pre-link, we just want the instrumented IR. We use the contextual
     // profile in the post-thinlink phase.
     // The instrumentation will be removed in post-thinlink after IPO.
+    // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
+    // mechanism for GUIDs.
+    MPM.addPass(AssignGUIDPass());
     if (IsCtxProfUse)
       return MPM;
     addPostPGOLoopRotation(MPM, Level);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 4fdded7b82f36b..18f4aa19224da0 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -46,6 +46,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
 #endif
 MODULE_PASS("always-inline", AlwaysInlinerPass())
 MODULE_PASS("annotation2metadata", Annotation2MetadataPass())
+MODULE_PASS("assign-guid", AssignGUIDPass())
 MODULE_PASS("attributor", AttributorPass())
 MODULE_PASS("attributor-light", AttributorLightPass())
 MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 626321f44c2bfc..e63633b8a1e1ab 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -252,7 +252,6 @@ static bool expandNormalizeIntrinsic(CallInst *Orig) {
     return true;
   }
 
-  Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
   unsigned XVecSize = XVec->getNumElements();
   Value *DotProduct = nullptr;
   // use the dot intrinsic corresponding to the vector size
diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
index 246e32c264dc98..1594fa533379b6 100644
--- a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -163,8 +163,8 @@ struct OpCodeProperty {
   llvm::SmallVector<OpOverload> Overloads;
   llvm::SmallVector<OpStage> Stages;
   llvm::SmallVector<OpAttribute> Attributes;
-  int OverloadParamIndex;        // parameter index which control the overload.
-                                 // When < 0, should be only 1 overload type.
+  int OverloadParamIndex; // parameter index which control the overload.
+                          // When < 0, should be only 1 overload type.
 };
 
 // Include getOpCodeClassName getOpCodeProperty, getOpCodeName and
diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp b/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp
index 33e0119807bb8e..d423220bb902ed 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp
+++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp
@@ -27,13 +27,6 @@ dxil::Resources DXILResourceMDAnalysis::run(Module &M,
 
 AnalysisKey DXILResourceMDAnalysis::Key;
 
-PreservedAnalyses DXILResourceMDPrinterPass::run(Module &M,
-                                                 ModuleAnalysisManager &AM) {
-  dxil::Resources Res = AM.getResult<DXILResourceMDAnalysis>(M);
-  Res.print(OS);
-  return PreservedAnalyses::all();
-}
-
 char DXILResourceMDWrapper::ID = 0;
 INITIALIZE_PASS_BEGIN(DXILResourceMDWrapper, DEBUG_TYPE,
                       "DXIL resource Information", true, true)
@@ -46,7 +39,3 @@ bool DXILResourceMDWrapper::runOnModule(Module &M) {
 }
 
 DXILResourceMDWrapper::DXILResourceMDWrapper() : ModulePass(ID) {}
-
-void DXILResourceMDWrapper::print(raw_ostream &OS, const Module *) const {
-  Resources.print(OS);
-}
diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
index 26d9237d51b49c..0ad97dc1992f4c 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
+++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
@@ -30,17 +30,6 @@ class DXILResourceMDAnalysis
   dxil::Resources run(Module &M, ModuleAnalysisManager &AM);
 };
 
-/// Printer pass for the \c DXILResourceMDAnalysis results.
-class DXILResourceMDPrinterPass
-    : public PassInfoMixin<DXILResourceMDPrinterPass> {
-  raw_ostream &OS;
-
-public:
-  explicit DXILResourceMDPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-  static bool isRequired() { return true; }
-};
-
 /// The legacy pass manager's analysis pass to compute DXIL resource
 /// information.
 class DXILResourceMDWrapper : public ModulePass {
@@ -60,8 +49,6 @@ class DXILResourceMDWrapper : public ModulePass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
   }
-
-  void print(raw_ostream &O, const Module *M = nullptr) const override;
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2b14deb479bf6f..911fa45d7173e8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1428,6 +1428,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   // Disable strict node mutation.
   IsStrictFPEnabled = true;
+  EnableExtLdPromotion = true;
 
   // Let the subtarget decide if a predictable select is more expensive than the
   // corresponding branch. This information is used in CGP/SelectOpt to decide
@@ -13843,8 +13844,10 @@ performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
 
   // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
+  // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
   if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
-      cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
+      cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16) &&
+      Subtarget.hasStdExtZfhmin())
     return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
                        Src.getOperand(0));
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 85715ca9145c35..abdd366741eb04 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -458,7 +458,6 @@ def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>;
 // Moves (no conversion)
 def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>;
 def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
-def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
 
 def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>;
 } // Predicates = [HasStdExtZhinxmin]
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 85683c62064435..781e3d7929aa43 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2000,3 +2000,35 @@ bool RISCVTTIImpl::areInlineCompatible(const Function *Caller,
   // target-features.
   return (CallerBits & CalleeBits) == CalleeBits;
 }
+
+/// See if \p I should be considered for address type promotion. We check if \p
+/// I is a sext with right type and used in memory accesses. If it used in a
+/// "complex" getelementptr, we allow it to be promoted without finding other
+/// sext instructions that sign extended the same initial value. A getelementptr
+/// is considered as "complex" if it has more than 2 operands.
+bool RISCVTTIImpl::shouldConsiderAddressTypePromotion(
+    const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
+  bool Considerable = false;
+  AllowPromotionWithoutCommonHeader = false;
+  if (!isa<SExtInst>(&I))
+    return false;
+  Type *ConsideredSExtType =
+      Type::getInt64Ty(I.getParent()->getParent()->getContext());
+  if (I.getType() != ConsideredSExtType)
+    return false;
+  // See if the sext is the one with the right type and used in at least one
+  // GetElementPtrInst.
+  for (const User *U : I.users()) {
+    if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+      Considerable = true;
+      // A getelementptr is considered as "complex" if it has more than 2
+      // operands. We will promote a SExt used in such complex GEP as we
+      // expect some computation to be merged if they are done on 64 bits.
+      if (GEPInst->getNumOperands() > 2) {
+        AllowPromotionWithoutCommonHeader = true;
+        break;
+      }
+    }
+  }
+  return Considerable;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9c37a4f6ec2d04..f5eca2839acd05 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -397,7 +397,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   bool shouldFoldTerminatingConditionAfterLSR() const {
     return true;
   }
-
+  bool
+  shouldConsiderAddressTypePromotion(const Instruction &I,
+                                     bool &AllowPromotionWithoutCommonHeader);
   std::optional<unsigned> getMinPageSize() const { return 4096; }
 };
 
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 21c1556d1d8ed2..ad1f2dc532d1c2 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -30,7 +30,6 @@ void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op,
                                          raw_ostream &O) {
   int64_t Imm = MI->getOperand(Op).getImm();
   unsigned Opc = MI->getOpcode();
-  bool IsCMPCCXADD = X86::isCMPCCXADD(Opc);
   bool IsCCMPOrCTEST = X86::isCCMPCC(Opc) || X86::isCTESTCC(Opc);
 
   // clang-format off
@@ -39,19 +38,19 @@ void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op,
   case    0: O << "o";  break;
   case    1: O << "no"; break;
   case    2: O << "b";  break;
-  case    3: O << (IsCMPCCXADD ? "nb" : "ae"); break;
-  case    4: O << (IsCMPCCXADD ?  "z" :  "e"); break;
-  case    5: O << (IsCMPCCXADD ? "nz" : "ne"); break;
+  case    3: O << "ae"; break;
+  case    4: O << "e";  break;
+  case    5: O << "ne"; break;
   case    6: O << "be"; break;
-  case    7: O << (IsCMPCCXADD ? "nbe" : "a"); break;
+  case    7: O << "a";  break;
   case    8: O << "s";  break;
   case    9: O << "ns"; break;
   case  0xa: O << (IsCCMPOrCTEST ? "t" : "p");  break;
   case  0xb: O << (IsCCMPOrCTEST ? "f" : "np"); break;
   case  0xc: O << "l";  break;
-  case  0xd: O << (IsCMPCCXADD ? "nl" : "ge"); break;
+  case  0xd: O << "ge"; break;
   case  0xe: O << "le"; break;
-  case  0xf: O << (IsCMPCCXADD ? "nle" : "g"); break;
+  case  0xf: O << "g";  break;
   }
   // clang-format on
 }
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index d6ba12465bb328..9b10cbba84075a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -8,6 +8,7 @@
 //
 
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Analysis.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -16,6 +17,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/CommandLine.h"
 #include <utility>
 
@@ -223,8 +225,8 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
       assert(Mark->getIndex()->isZero());
 
       IRBuilder<> Builder(Mark);
-      // FIXME(mtrofin): use InstrProfSymtab::getCanonicalName
-      Guid = Builder.getInt64(F.getGUID());
+
+      Guid = Builder.getInt64(AssignGUIDPass::getGUID(F));
       // The type of the context of this function is now knowable since we have
       // NrCallsites and NrCounters. We delcare it here because it's more
       // convenient - we have the Builder.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f1bb96a38cfaa9..fdf8f7042c4fb8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9363,46 +9363,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
   State.set(this, Res, 0);
 }
 
-void VPWidenStoreRecipe::execute(VPTransformState &State) {
-  auto *SI = cast<StoreInst>(&Ingredient);
-
-  VPValue *StoredVPValue = getStoredValue();
-  bool CreateScatter = !isConsecutive();
-  const Align Alignment = getLoadStoreAlignment(&Ingredient);
-
-  auto &Builder = State.Builder;
-  State.setDebugLocFrom(getDebugLoc());
-
-  for (unsigned Part = 0; Part < State.UF; ++Part) {
-    Instruction *NewSI = nullptr;
-    Value *Mask = nullptr;
-    if (auto *VPMask = getMask()) {
-      // Mask reversal is only needed for non-all-one (null) masks, as reverse
-      // of a null all-one mask is a null mask.
-      Mask = State.get(VPMask, Part);
-      if (isReverse())
-        Mask = Builder.CreateVectorReverse(Mask, "reverse");
-    }
-
-    Value *StoredVal = State.get(StoredVPValue, Part);
-    if (isReverse()) {
-      // If we store to reverse consecutive memory locations, then we need
-      // to reverse the order of elements in the stored value.
-      StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
-      // We don't want to update the value in the map as it might be used in
-      // another expression. So don't call resetVectorValue(StoredVal).
-    }
-    Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateScatter);
-    if (CreateScatter)
-      NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
-    else if (Mask)
-      NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
-    else
-      NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
-    State.addMetadata(NewSI, SI);
-  }
-}
-
 void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
   assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
                           "explicit vector length.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 911b2fe9e9a1eb..bc57ea4d52471e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2066,7 +2066,49 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
   O << " = vp.load ";
   printOperands(O, SlotTracker);
 }
+#endif
+
+void VPWidenStoreRecipe::execute(VPTransformState &State) {
+  auto *SI = cast<StoreInst>(&Ingredient);
+
+  VPValue *StoredVPValue = getStoredValue();
+  bool CreateScatter = !isConsecutive();
+  const Align Alignment = getLoadStoreAlignment(&Ingredient);
+
+  auto &Builder = State.Builder;
+  State.setDebugLocFrom(getDebugLoc());
 
+  for (unsigned Part = 0; Part < State.UF; ++Part) {
+    Instruction *NewSI = nullptr;
+    Value *Mask = nullptr;
+    if (auto *VPMask = getMask()) {
+      // Mask reversal is only needed for non-all-one (null) masks, as reverse
+      // of a null all-one mask is a null mask.
+      Mask = State.get(VPMask, Part);
+      if (isReverse())
+        Mask = Builder.CreateVectorReverse(Mask, "reverse");
+    }
+
+    Value *StoredVal = State.get(StoredVPValue, Part);
+    if (isReverse()) {
+      // If we store to reverse consecutive memory locations, then we need
+      // to reverse the order of elements in the stored value.
+      StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+      // We don't want to update the value in the map as it might be used in
+      // another expression. So don't call resetVectorValue(StoredVal).
+    }
+    Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateScatter);
+    if (CreateScatter)
+      NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
+    else if (Mask)
+      NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
+    else
+      NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
+    State.addMetadata(NewSI, SI);
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
                                VPSlotTracker &SlotTracker) const {
   O << Indent << "WIDEN store ";
diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
new file mode 100644
index 00000000000000..0cdf82bd96efcb
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
@@ -0,0 +1,119 @@
+; REQUIRES: x86_64-linux
+;
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+;
+; Test that the GUID metadata survives through thinlink.
+;
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+;
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata -o %t/m1.bc %t/m1.ll
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata -o %t/m2.bc %t/m2.ll
+;
+; RUN: rm -rf %t/postlink
+; RUN: mkdir %t/postlink
+;
+;
+; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc -o %t/ -thinlto-distributed-indexes \
+; RUN:  -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN:  -r %t/m1.bc,f1,plx \
+; RUN:  -r %t/m2.bc,f1 \
+; RUN:  -r %t/m2.bc,entrypoint,plx
+; RUN: opt --passes='function-import,require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
+; RUN:  -summary-file=%t/m2.bc.thinlto.bc -use-ctx-profile=%t/profile.ctxprofdata %t/m2.bc \
+; RUN:  -S -o %t/m2.post.ll 2> %t/profile.txt
+; RUN: diff %t/expected.txt %t/profile.txt
+;--- m1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+source_filename = "random_path/m1.cc"
+
+define private void @f2() #0 !guid !0 {
+  ret void
+}
+
+define void @f1() #0 {
+  call void @f2()
+  ret void
+}
+
+attributes #0 = { noinline }
+!0 = !{ i64 3087265239403591524 }
+
+;--- m2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+source_filename = "random_path/m2.cc"
+
+declare void @f1()
+
+define void @entrypoint() {
+  call void @f1()
+  ret void
+}
+;--- profile.json
+[
+  {
+    "Callsites": [
+      [
+        {
+          "Callsites": [
+            [
+              {
+                "Counters": [
+                  10
+                ],
+                "Guid": 3087265239403591524
+              }
+            ]
+          ],
+          "Counters": [
+            7
+          ],
+          "Guid": 2072045998141807037
+        }
+      ]
+    ],
+    "Counters": [
+      1
+    ],
+    "Guid": 10507721908651011566
+  }
+]
+;--- expected.txt
+Function Info:
+10507721908651011566 : entrypoint. MaxCounterID: 1. MaxCallsiteID: 1
+3087265239403591524 : f2.llvm.0. MaxCounterID: 1. MaxCallsiteID: 0
+2072045998141807037 : f1. MaxCounterID: 1. MaxCallsiteID: 1
+
+Current Profile:
+[
+  {
+    "Callsites": [
+      [
+        {
+          "Callsites": [
+            [
+              {
+                "Counters": [
+                  10
+                ],
+                "Guid": 3087265239403591524
+              }
+            ]
+          ],
+          "Counters": [
+            7
+          ],
+          "Guid": 2072045998141807037
+        }
+      ]
+    ],
+    "Counters": [
+      1
+    ],
+    "Guid": 10507721908651011566
+  }
+]
diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll
index 9cd78cfef187ba..69806e334aaec9 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/load.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll
@@ -1,16 +1,22 @@
 ; REQUIRES: x86_64-linux
-
+;
+; RUN: rm -rf %t
 ; RUN: split-file %s %t
 ; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
 ; RUN: not opt -passes='require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
-; RUN:   %t/empty.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
+; RUN:   %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
 
 ; RUN: not opt -passes='require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
-; RUN:   -use-ctx-profile=does_not_exist.ctxprofdata %t/empty.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
+; RUN:   -use-ctx-profile=does_not_exist.ctxprofdata %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
 
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' \
+; RUN:   -use-ctx-profile=%t/profile.ctxprofdata %t/example.ll -S -o %t/prelink.ll
+
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN:  %t/example.ll -S -o %t/prelink.ll
 ; RUN: opt -passes='require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
-; RUN:   -use-ctx-profile=%t/profile.ctxprofdata %t/empty.ll -S 2> %t/output.json
-; RUN: diff %t/profile.json %t/output.json
+; RUN:   -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S 2> %t/output.txt
+; RUN: diff %t/expected-profile-output.txt %t/output.txt
 
 ; NO-FILE: error: could not open contextual profile file
 ;
@@ -18,41 +24,104 @@
 ; output it from opt.
 ;--- profile.json
 [
+  {
+    "Counters": [
+      9
+    ],
+    "Guid": 12341
+  },
+  {
+    "Counters": [
+      5
+    ],
+    "Guid": 12074870348631550642
+  },
   {
     "Callsites": [
-      [],
       [
         {
           "Counters": [
-            4,
-            5
+            6,
+            7
           ],
-          "Guid": 2000
-        },
+          "Guid": 728453322856651412
+        }
+      ]
+    ],
+    "Counters": [
+      1
+    ],
+    "Guid": 11872291593386833696
+  }
+]
+;--- expected-profile-output.txt
+Function Info:
+4909520559318251808 : an_entrypoint. MaxCounterID: 2. MaxCallsiteID: 1
+12074870348631550642 : another_entrypoint_no_callees. MaxCounterID: 1. MaxCallsiteID: 0
+11872291593386833696 : foo. MaxCounterID: 1. MaxCallsiteID: 1
+
+Current Profile:
+[
+  {
+    "Callsites": [
+      [
         {
           "Counters": [
             6,
-            7,
-            8
+            7
           ],
-          "Guid": 18446744073709551613
+          "Guid": 728453322856651412
         }
       ]
     ],
     "Counters": [
-      1,
-      2,
-      3
+      1
     ],
-    "Guid": 1000
+    "Guid": 11872291593386833696
   },
   {
     "Counters": [
-      5,
-      9,
-      10
+      5
     ],
-    "Guid": 18446744073709551612
+    "Guid": 12074870348631550642
   }
 ]
-;--- empty.ll
+;--- example.ll
+declare void @bar()
+
+define private void @foo(i32 %a, ptr %fct) #0 !guid !0 {
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+yes:
+  call void %fct(i32 %a)
+  br label %exit
+no:
+  call void @bar()
+  br label %exit
+exit:
+  ret void
+}
+
+define void @an_entrypoint(i32 %a) {
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+
+yes:
+  call void @foo(i32 1, ptr null)
+  ret void
+no:
+  ret void
+}
+
+define void @another_entrypoint_no_callees(i32 %a) {
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+
+yes:
+  ret void
+no:
+  ret void
+}
+
+attributes #0 = { noinline }
+!0 = !{ i64 11872291593386833696 }
\ No newline at end of file
diff --git a/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll b/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll
deleted file mode 100644
index 7c1e070c292d18..00000000000000
--- a/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-; CHECK: error: Alignment is neither 0 nor a power of 2
-
-target datalayout = "Fi24"
diff --git a/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll b/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll
deleted file mode 100644
index e12cfce0309746..00000000000000
--- a/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-; CHECK: error: Invalid ABI alignment, i8 must be naturally aligned
-
-target datalayout = "i8:16"
diff --git a/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll b/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll
deleted file mode 100644
index 1ccfb7832a50cc..00000000000000
--- a/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-; CHECK: error: Alignment is neither 0 nor a power of 2
-
-target datalayout = "S24"
diff --git a/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll
deleted file mode 100644
index f0407da73e4fc2..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "A16777216"
-; CHECK: Invalid address space, must be a 24-bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll
deleted file mode 100644
index 19bf77db329d2c..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-; CHECK: Invalid address space, must be a 24-bit integer
-target datalayout = "G16777216"
diff --git a/llvm/test/Assembler/invalid-datalayout-index-size.ll b/llvm/test/Assembler/invalid-datalayout-index-size.ll
deleted file mode 100644
index dc608cdd56a040..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-index-size.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p:64:64:64:128"
-; CHECK: Index width cannot be larger than pointer width
diff --git a/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll
deleted file mode 100644
index e636b75dee4d04..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-; CHECK: Invalid address space, must be a 24-bit integer
-target datalayout = "P16777216"
diff --git a/llvm/test/Assembler/invalid-datalayout1.ll b/llvm/test/Assembler/invalid-datalayout1.ll
deleted file mode 100644
index d1befdcdf294d5..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout1.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "^"
-; CHECK: Unknown specifier in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout10.ll b/llvm/test/Assembler/invalid-datalayout10.ll
deleted file mode 100644
index 9f19688f852b4a..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout10.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "m"
-; CHECK: Expected mangling specifier in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout11.ll b/llvm/test/Assembler/invalid-datalayout11.ll
deleted file mode 100644
index f8fed8ff9ff339..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout11.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "m."
-; CHECK: Unexpected trailing characters after mangling specifier in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout12.ll b/llvm/test/Assembler/invalid-datalayout12.ll
deleted file mode 100644
index d79c196baab16f..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout12.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "f"
-; CHECK: Missing alignment specification in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout13.ll b/llvm/test/Assembler/invalid-datalayout13.ll
deleted file mode 100644
index 5ac719dbb7a9c0..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout13.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = ":32"
-; CHECK: Expected token before separator in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout14.ll b/llvm/test/Assembler/invalid-datalayout14.ll
deleted file mode 100644
index 84634b52a146ca..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout14.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i64:64:16"
-; CHECK: Preferred alignment cannot be less than the ABI alignment
diff --git a/llvm/test/Assembler/invalid-datalayout15.ll b/llvm/test/Assembler/invalid-datalayout15.ll
deleted file mode 100644
index ea240b73fd25f2..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout15.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i64:16:16777216"
-; CHECK: Invalid preferred alignment, must be a 16bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout16.ll b/llvm/test/Assembler/invalid-datalayout16.ll
deleted file mode 100644
index 0dd1abb629b6fc..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout16.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i64:16777216:16777216"
-; CHECK: Invalid ABI alignment, must be a 16bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout17.ll b/llvm/test/Assembler/invalid-datalayout17.ll
deleted file mode 100644
index b7eab74ad2a8ca..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout17.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i16777216:16:16"
-; CHECK: Invalid bit width, must be a 24-bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout18.ll b/llvm/test/Assembler/invalid-datalayout18.ll
deleted file mode 100644
index b9956f98c9c6dc..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout18.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p:32:32:16"
-; CHECK: Preferred alignment cannot be less than the ABI alignment
diff --git a/llvm/test/Assembler/invalid-datalayout19.ll b/llvm/test/Assembler/invalid-datalayout19.ll
deleted file mode 100644
index fc0fc468520928..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout19.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "p:0:32:32"
-
-; CHECK: Invalid pointer size of 0 bytes
-
diff --git a/llvm/test/Assembler/invalid-datalayout2.ll b/llvm/test/Assembler/invalid-datalayout2.ll
deleted file mode 100644
index a435612bf85459..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout2.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "m:v"
-; CHECK: Unknown mangling in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout20.ll b/llvm/test/Assembler/invalid-datalayout20.ll
deleted file mode 100644
index a9ac1d7fe0983a..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout20.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "p:64:24:64"
-
-; CHECK: Pointer ABI alignment must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout21.ll b/llvm/test/Assembler/invalid-datalayout21.ll
deleted file mode 100644
index a39d1d7a14a86b..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout21.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "p:64:64:24"
-
-; CHECK: Pointer preferred alignment must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout22.ll b/llvm/test/Assembler/invalid-datalayout22.ll
deleted file mode 100644
index 14e4c2822ce4b0..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout22.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "v128:0:128"
-
-; CHECK: ABI alignment specification must be >0 for non-aggregate types
-
diff --git a/llvm/test/Assembler/invalid-datalayout23.ll b/llvm/test/Assembler/invalid-datalayout23.ll
deleted file mode 100644
index 430326327bc116..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout23.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "i32:24:32"
-
-; CHECK: Invalid ABI alignment, must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout24.ll b/llvm/test/Assembler/invalid-datalayout24.ll
deleted file mode 100644
index 616ec64518a5b9..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout24.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "i32:32:24"
-
-; CHECK: Invalid preferred alignment, must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout3.ll b/llvm/test/Assembler/invalid-datalayout3.ll
deleted file mode 100644
index 44535fd055b5ea..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout3.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "n0"
-; CHECK: Zero width native integer type in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout4.ll b/llvm/test/Assembler/invalid-datalayout4.ll
deleted file mode 100644
index 99a6a6093954e1..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout4.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p16777216:64:64:64"
-; CHECK: Invalid address space, must be a 24-bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout5.ll b/llvm/test/Assembler/invalid-datalayout5.ll
deleted file mode 100644
index 3ce8791c0870b4..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout5.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "a1:64"
-; CHECK: Sized aggregate specification in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout6.ll b/llvm/test/Assembler/invalid-datalayout6.ll
deleted file mode 100644
index 425099f7cad869..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout6.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "a:"
-; CHECK: Trailing separator in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout7.ll b/llvm/test/Assembler/invalid-datalayout7.ll
deleted file mode 100644
index 5e010710889f6d..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout7.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p:48:52"
-; CHECK: number of bits must be a byte width multiple
diff --git a/llvm/test/Assembler/invalid-datalayout8.ll b/llvm/test/Assembler/invalid-datalayout8.ll
deleted file mode 100644
index 28832ffb17dd05..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout8.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "e-p"
-; CHECK: Missing size specification for pointer in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout9.ll b/llvm/test/Assembler/invalid-datalayout9.ll
deleted file mode 100644
index dfeac65cf604d1..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout9.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "e-p:64"
-; CHECK: Missing alignment specification for pointer in datalayout string
diff --git a/llvm/test/CodeGen/DirectX/CreateHandle.ll b/llvm/test/CodeGen/DirectX/CreateHandle.ll
index ca4def78e73ded..1fad869ab43057 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandle.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandle.ll
@@ -17,10 +17,12 @@ define void @test_buffers() {
   ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 6, i1 false)
 
   ; Buffer<uint4> Buf[24] : register(t3, space5)
+  ; Buffer<uint4> typed2 = Buf[5]
+  ; Note that the index below is 3 + 4 = 7
   %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0_0t(
-          i32 2, i32 7, i32 24, i32 8, i1 false)
-  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 8, i1 false)
+          i32 5, i32 3, i32 24, i32 7, i1 false)
+  ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 7, i1 false)
 
   ; struct S { float4 a; uint4 b; };
   ; StructuredBuffer<S> Buf : register(t2, space4)
diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
index 9b6688cb11d54d..e8bd8fe89132d9 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
@@ -18,10 +18,12 @@ define void @test_bindings() {
   ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF1]], %dx.types.ResourceProperties { i32 4106, i32 260 })
 
   ; Buffer<uint4> Buf[24] : register(t3, space5)
+  ; Buffer<uint4> typed2 = Buf[4]
+  ; Note that the index below is 3 + 4 = 7
   %typed2 = call target("dx.TypedBuffer", <4 x i32>, 0, 0, 0)
       @llvm.dx.handle.fromBinding.tdx.TypedBuffer_i32_0_0_0t(
-          i32 2, i32 7, i32 24, i32 8, i1 false)
-  ; CHECK: [[BUF2:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 7, i32 30, i32 2, i8 0 }, i32 8, i1 false)
+          i32 5, i32 3, i32 24, i32 7, i1 false)
+  ; CHECK: [[BUF2:%[0-9]*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 218, %dx.types.ResBind { i32 3, i32 26, i32 5, i8 0 }, i32 7, i1 false)
   ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BUF2]], %dx.types.ResourceProperties { i32 10, i32 1029 })
 
   ; struct S { float4 a; uint4 b; };
diff --git a/llvm/test/CodeGen/DirectX/normalize.ll b/llvm/test/CodeGen/DirectX/normalize.ll
index f3533cc56e7c25..e2c8a5d4656a65 100644
--- a/llvm/test/CodeGen/DirectX/normalize.ll
+++ b/llvm/test/CodeGen/DirectX/normalize.ll
@@ -22,7 +22,6 @@ entry:
 
 define noundef <2 x half> @test_normalize_half2(<2 x half> noundef %p0) {
 entry:
-  ; CHECK: extractelement <2 x half> %{{.*}}, i64 0
   ; EXPCHECK: [[doth2:%.*]] = call half @llvm.dx.dot2.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
   ; DOPCHECK: [[doth2:%.*]] = call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth2]])
@@ -37,7 +36,6 @@ entry:
 
 define noundef <3 x half> @test_normalize_half3(<3 x half> noundef %p0) {
 entry:
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 0
   ; EXPCHECK: [[doth3:%.*]] = call half @llvm.dx.dot3.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}})
   ; DOPCHECK: [[doth3:%.*]] = call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth3]])
@@ -52,7 +50,6 @@ entry:
 
 define noundef <4 x half> @test_normalize_half4(<4 x half> noundef %p0) {
 entry:
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 0
   ; EXPCHECK: [[doth4:%.*]] = call half @llvm.dx.dot4.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}})
   ; DOPCHECK: [[doth4:%.*]] = call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth4]])
@@ -74,7 +71,6 @@ entry:
 
 define noundef <2 x float> @test_normalize_float2(<2 x float> noundef %p0) {
 entry:
-  ; CHECK: extractelement <2 x float> %{{.*}}, i64 0
   ; EXPCHECK: [[dotf2:%.*]] = call float @llvm.dx.dot2.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}})
   ; DOPCHECK: [[dotf2:%.*]] = call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf2]])
@@ -89,7 +85,6 @@ entry:
 
 define noundef <3 x float> @test_normalize_float3(<3 x float> noundef %p0) {
 entry:
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 0
   ; EXPCHECK: [[dotf3:%.*]] = call float @llvm.dx.dot3.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}})
   ; DOPCHECK: [[dotf3:%.*]] = call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf3]])
@@ -104,7 +99,6 @@ entry:
 
 define noundef <4 x float> @test_normalize_float4(<4 x float> noundef %p0) {
 entry:
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 0
   ; EXPCHECK: [[dotf4:%.*]] = call float @llvm.dx.dot4.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   ; DOPCHECK: [[dotf4:%.*]] = call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf4]])
diff --git a/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..b4fdd954b856c8
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,431 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D
+; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+;
+; LA32F-LABEL: maximumnum_float:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_float:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_float:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_float:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+;
+; LA32F-LABEL: maximumnum_float_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_float_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_float_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_float_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+;
+; LA32F-LABEL: maximumnum_float_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_float_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_float_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_float_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+;
+; LA32F-LABEL: maximumnum_double:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fmaximum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_double:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_double:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fmaximum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_double:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+;
+; LA32F-LABEL: maximumnum_double_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fmaximum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_double_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_double_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fmaximum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_double_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+;
+; LA32F-LABEL: maximumnum_double_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fmaximum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_double_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_double_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fmaximum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_double_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+;
+; LA32F-LABEL: minimumnum_float:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_float:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_float:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_float:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nsz(float %x, float %y) {
+;
+; LA32F-LABEL: minimumnum_float_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_float_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_float_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_float_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nnan(float %x, float %y) {
+;
+; LA32F-LABEL: minimumnum_float_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_float_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_float_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_float_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define double @minimumnum_double(double %x, double %y) {
+;
+; LA32F-LABEL: minimumnum_double:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fminimum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_double:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_double:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fminimum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_double:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nsz(double %x, double %y) {
+;
+; LA32F-LABEL: minimumnum_double_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fminimum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_double_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_double_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fminimum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_double_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nnan(double %x, double %y) {
+;
+; LA32F-LABEL: minimumnum_double_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fminimum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_double_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_double_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fminimum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_double_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
diff --git a/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..bc81966ca0f5c9
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=mipsisa32r6 < %s | FileCheck %s --check-prefix=MIPS32R6
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+; MIPS32R6-LABEL: maximumnum_float:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f1, $f0
+  %z = call float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+; MIPS32R6-LABEL: maximumnum_float_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f1, $f0
+  %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+; MIPS32R6-LABEL: maximumnum_float_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f12, $f14
+  %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+; MIPS32R6-LABEL: maximumnum_double:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f1, $f0
+  %z = call double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+; MIPS32R6-LABEL: maximumnum_double_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f1, $f0
+  %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+; MIPS32R6-LABEL: maximumnum_double_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f12, $f14
+  %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+; MIPS32R6-LABEL: minimumnum_float:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f1, $f0
+  %z = call float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nsz(float %x, float %y) {
+; MIPS32R6-LABEL: minimumnum_float_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f1, $f0
+  %z = call nsz float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nnan(float %x, float %y) {
+; MIPS32R6-LABEL: minimumnum_float_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f12, $f14
+  %z = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define double @minimumnum_double(double %x, double %y) {
+; MIPS32R6-LABEL: minimumnum_double:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f1, $f0
+  %z = call double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nsz(double %x, double %y) {
+; MIPS32R6-LABEL: minimumnum_double_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f1, $f0
+  %z = call nsz double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nnan(double %x, double %y) {
+; MIPS32R6-LABEL: minimumnum_double_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f12, $f14
+  %z = call nnan double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 21bf6618c52a26..805ddee4ac3f6f 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -336,17 +336,23 @@ start:
 }
 declare i32 @llvm.fptoui.sat.i32.f32(float)
 
-define i32 @fmv_x_w(float %a, float %b) nounwind {
+define signext i32 @fmv_x_w(float %a, float %b) nounwind {
 ; CHECKIF-LABEL: fmv_x_w:
 ; CHECKIF:       # %bb.0:
 ; CHECKIF-NEXT:    fadd.s fa5, fa0, fa1
 ; CHECKIF-NEXT:    fmv.x.w a0, fa5
 ; CHECKIF-NEXT:    ret
 ;
-; CHECKIZFINX-LABEL: fmv_x_w:
-; CHECKIZFINX:       # %bb.0:
-; CHECKIZFINX-NEXT:    fadd.s a0, a0, a1
-; CHECKIZFINX-NEXT:    ret
+; RV32IZFINX-LABEL: fmv_x_w:
+; RV32IZFINX:       # %bb.0:
+; RV32IZFINX-NEXT:    fadd.s a0, a0, a1
+; RV32IZFINX-NEXT:    ret
+;
+; RV64IZFINX-LABEL: fmv_x_w:
+; RV64IZFINX:       # %bb.0:
+; RV64IZFINX-NEXT:    fadd.s a0, a0, a1
+; RV64IZFINX-NEXT:    sext.w a0, a0
+; RV64IZFINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fmv_x_w:
 ; RV32I:       # %bb.0:
@@ -362,6 +368,7 @@ define i32 @fmv_x_w(float %a, float %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll b/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll
new file mode 100644
index 00000000000000..b733c6a1c787ba
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -S | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "riscv64"
+
+%struct.match_state = type { i64, i64  }
+
+; %add is also promoted by forking an extra sext.
+define void @promoteTwoOne(i32 %i, i32 %j, ptr %P1, ptr %P2 ) {
+; CHECK-LABEL: define void @promoteTwoOne(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S2:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[PROMOTED2:%.*]] = sext i32 [[J]] to i64
+; CHECK-NEXT:    [[S:%.*]] = add nsw i64 [[S2]], [[PROMOTED2]]
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]]
+; CHECK-NEXT:    store i64 [[S]], ptr [[ADDR1]], align 8
+; CHECK-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]]
+; CHECK-NEXT:    store i64 [[S2]], ptr [[ADDR2]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add = add nsw i32 %i, %j
+  %s = sext i32 %add to i64
+  %addr1 = getelementptr inbounds i64, ptr %P1, i64 %s
+  store i64 %s, ptr %addr1
+  %s2 = sext i32 %i to i64
+  %addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2
+  store i64 %s2, ptr %addr2
+  ret void
+}
+
+; Both %add1 and %add2 are promoted by forking extra sexts.
+define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, ptr %P1, ptr %P2) {
+; CHECK-LABEL: define void @promoteTwoTwo(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], i32 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PROMOTED3:%.*]] = sext i32 [[J]] to i64
+; CHECK-NEXT:    [[PROMOTED4:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[S:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED4]]
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]]
+; CHECK-NEXT:    store i64 [[S]], ptr [[ADDR1]], align 8
+; CHECK-NEXT:    [[PROMOTED2:%.*]] = sext i32 [[K]] to i64
+; CHECK-NEXT:    [[S2:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED2]]
+; CHECK-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]]
+; CHECK-NEXT:    store i64 [[S2]], ptr [[ADDR2]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add1 = add nsw i32 %j, %i
+  %s = sext i32 %add1 to i64
+  %addr1 = getelementptr inbounds i64, ptr %P1, i64 %s
+  store i64 %s, ptr %addr1
+  %add2 = add nsw i32 %j, %k
+  %s2 = sext i32 %add2 to i64
+  %addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2
+  store i64 %s2, ptr %addr2
+  ret void
+}
+
+define i64 @promoteGEPSunk(i1 %cond, ptr %base, i32 %i) {
+; CHECK-LABEL: define i64 @promoteGEPSunk(
+; CHECK-SAME: i1 [[COND:%.*]], ptr [[BASE:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PROMOTED1:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[S:%.*]] = add nsw i64 [[PROMOTED1]], 1
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S]]
+; CHECK-NEXT:    [[S2:%.*]] = add nsw i64 [[PROMOTED1]], 2
+; CHECK-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S2]]
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_THEN2:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[ADDR]], align 8
+; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[ADDR2]], align 8
+; CHECK-NEXT:    [[R:%.*]] = add i64 [[V]], [[V2]]
+; CHECK-NEXT:    ret i64 [[R]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %add = add nsw i32 %i, 1
+  %s = sext i32 %add to i64
+  %addr = getelementptr inbounds i64, ptr %base, i64 %s
+  %add2 = add nsw i32 %i,  2
+  %s2 = sext i32 %add2 to i64
+  %addr2 = getelementptr inbounds i64, ptr %base, i64 %s2
+  br i1 %cond, label %if.then, label %if.then2
+if.then:
+  %v = load i64, ptr %addr
+  %v2 = load i64, ptr %addr2
+  %r = add i64 %v, %v2
+  ret i64 %r
+if.then2:
+  ret i64 0;
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll
index 08dcefa0464030..9aec4dea63b9d2 100644
--- a/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll
@@ -123,6 +123,8 @@ define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind {
 ; RV64IZHINX-LABEL: bcvt_f16_to_sext_i16:
 ; RV64IZHINX:       # %bb.0:
 ; RV64IZHINX-NEXT:    fadd.h a0, a0, a1
+; RV64IZHINX-NEXT:    slli a0, a0, 48
+; RV64IZHINX-NEXT:    srai a0, a0, 48
 ; RV64IZHINX-NEXT:    ret
   %1 = fadd half %a, %b
   %2 = bitcast half %1 to i16
diff --git a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll
index f867fe46f0ec33..aac1a65e6c4fec 100644
--- a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll
@@ -144,6 +144,8 @@ define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind {
 ; RV64IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; RV64IZHINXMIN-NEXT:    fadd.s a0, a0, a1
 ; RV64IZHINXMIN-NEXT:    fcvt.h.s a0, a0
+; RV64IZHINXMIN-NEXT:    slli a0, a0, 48
+; RV64IZHINXMIN-NEXT:    srai a0, a0, 48
 ; RV64IZHINXMIN-NEXT:    ret
   %1 = fadd half %a, %b
   %2 = bitcast half %1 to i16
diff --git a/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll b/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll
index f88216f95a7614..561289c1b77465 100644
--- a/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll
@@ -112,13 +112,13 @@ define dso_local i32 @test_cmplxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
 ; CHECK-LABEL: test_cmplxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnbxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe3,0x07]
+; CHECK-NEXT:    cmpaexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe3,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmplxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnbxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe3,0x07]
+; EGPR-NEXT:    cmpaexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe3,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 3)
@@ -129,95 +129,95 @@ define dso_local i64 @test_cmplxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
 ; CHECK-LABEL: test_cmplxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnbxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
+; CHECK-NEXT:    cmpaexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmplxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnbxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
+; EGPR-NEXT:    cmpaexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 3)
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnbexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbexadd32:
+define dso_local i32 @test_cmpaxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpaxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpzxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe4,0x07]
+; CHECK-NEXT:    cmpexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe4,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbexadd32:
+; EGPR-LABEL: test_cmpaxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpzxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe4,0x07]
+; EGPR-NEXT:    cmpexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe4,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 4)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnbexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbexadd64:
+define dso_local i64 @test_cmpaxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpaxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpzxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
+; CHECK-NEXT:    cmpexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbexadd64:
+; EGPR-LABEL: test_cmpaxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpzxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
+; EGPR-NEXT:    cmpexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 4)
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnbxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbxadd32:
+define dso_local i32 @test_cmpaexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpaexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnzxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe5,0x07]
+; CHECK-NEXT:    cmpnexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe5,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbxadd32:
+; EGPR-LABEL: test_cmpaexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnzxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe5,0x07]
+; EGPR-NEXT:    cmpnexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe5,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 5)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnbxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbxadd64:
+define dso_local i64 @test_cmpaexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpaexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnzxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
+; CHECK-NEXT:    cmpnexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbxadd64:
+; EGPR-LABEL: test_cmpaexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnzxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
+; EGPR-NEXT:    cmpnexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 5)
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnlexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlexadd32:
+define dso_local i32 @test_cmpgxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpgxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; CHECK-NEXT:    cmpbexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe6,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlexadd32:
+; EGPR-LABEL: test_cmpgxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; EGPR-NEXT:    cmpbexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe6,0x07]
@@ -227,14 +227,14 @@ entry:
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnlexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlexadd64:
+define dso_local i64 @test_cmpgxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpgxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; CHECK-NEXT:    cmpbexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe6,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlexadd64:
+; EGPR-LABEL: test_cmpgxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; EGPR-NEXT:    cmpbexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe6,0x07]
@@ -244,34 +244,34 @@ entry:
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnlxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlxadd32:
+define dso_local i32 @test_cmpgexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpgexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnbexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe7,0x07]
+; CHECK-NEXT:    cmpaxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe7,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlxadd32:
+; EGPR-LABEL: test_cmpgexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnbexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe7,0x07]
+; EGPR-NEXT:    cmpaxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe7,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 7)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnlxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlxadd64:
+define dso_local i64 @test_cmpgexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpgexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnbexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
+; CHECK-NEXT:    cmpaxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlxadd64:
+; EGPR-LABEL: test_cmpgexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnbexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
+; EGPR-NEXT:    cmpaxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 7)
@@ -380,14 +380,14 @@ entry:
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnzxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnzxadd32:
+define dso_local i32 @test_cmpnexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpnexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; CHECK-NEXT:    cmpnpxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xeb,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnzxadd32:
+; EGPR-LABEL: test_cmpnexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; EGPR-NEXT:    cmpnpxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xeb,0x07]
@@ -397,14 +397,14 @@ entry:
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnzxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnzxadd64:
+define dso_local i64 @test_cmpnexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpnexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; CHECK-NEXT:    cmpnpxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xeb,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnzxadd64:
+; EGPR-LABEL: test_cmpnexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; EGPR-NEXT:    cmpnpxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xeb,0x07]
@@ -452,13 +452,13 @@ define dso_local i32 @test_cmppxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
 ; CHECK-LABEL: test_cmppxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnlxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xed,0x07]
+; CHECK-NEXT:    cmpgexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xed,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmppxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnlxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xed,0x07]
+; EGPR-NEXT:    cmpgexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xed,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 13)
@@ -469,13 +469,13 @@ define dso_local i64 @test_cmppxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
 ; CHECK-LABEL: test_cmppxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnlxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xed,0x07]
+; CHECK-NEXT:    cmpgexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xed,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmppxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnlxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xed,0x07]
+; EGPR-NEXT:    cmpgexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xed,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 13)
@@ -516,34 +516,34 @@ entry:
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpzxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpzxadd32:
+define dso_local i32 @test_cmpexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnlexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xef,0x07]
+; CHECK-NEXT:    cmpgxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xef,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpzxadd32:
+; EGPR-LABEL: test_cmpexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnlexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xef,0x07]
+; EGPR-NEXT:    cmpgxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xef,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 15)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpzxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpzxadd64:
+define dso_local i64 @test_cmpexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnlexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xef,0x07]
+; CHECK-NEXT:    cmpgxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xef,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpzxadd64:
+; EGPR-LABEL: test_cmpexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnlexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xef,0x07]
+; EGPR-NEXT:    cmpgxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xef,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 15)
diff --git a/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt b/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt
index 2a54bebd5212c9..7a2e09af5b3db3 100644
--- a/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt
@@ -1,20 +1,20 @@
 # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
 # RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
 
-# ATT:   cmpnbexadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpaxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpaxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b
 
-# ATT:   cmpnbexadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnbexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpaxadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpaxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b
 
-# ATT:   cmpnbexadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnbexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpaxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpaxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnbexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpaxadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpaxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00
 
 # ATT:   cmpbexadd	%ecx, %edx, 123(%rax,%rbx,4)
@@ -49,52 +49,52 @@
 # INTEL: cmpbxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b
 
-# ATT:   cmpzxadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpexadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b
 
-# ATT:   cmpzxadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpexadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnlxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpgexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpgexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b
 
-# ATT:   cmpnlxadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnlxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpgexadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpgexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b
 
-# ATT:   cmpnlxadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnlxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpgexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpgexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnlxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpgexadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpgexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnlexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpgxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpgxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b
 
-# ATT:   cmpnlexadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnlexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpgxadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpgxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b
 
-# ATT:   cmpnlexadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnlexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpgxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpgxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnlexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpgxadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpgxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00
 
 # ATT:   cmplexadd	%ecx, %edx, 123(%rax,%rbx,4)
@@ -129,20 +129,20 @@
 # INTEL: cmplxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpnexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpnexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b
 
-# ATT:   cmpnzxadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpnexadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpnexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b
 
-# ATT:   cmpnzxadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpnexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpnexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpnexadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpnexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00
 
 # ATT:   cmpnoxadd	%ecx, %edx, 123(%rax,%rbx,4)
diff --git a/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt b/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt
index 62420db37f40d7..7b1599de263263 100644
--- a/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt
+++ b/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt
@@ -193,196 +193,196 @@
 # INTEL: cmplxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnbexadd %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbexadd %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd %eax, %ecx, (%rip)
-# INTEL: cmpnbexadd dword ptr [rip], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, (%rip)
+# INTEL: cmpaxadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbexadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnbexadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpaxadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpaxadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnbexadd %eax, %ecx, 508(%rcx)
-# INTEL: cmpnbexadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, 508(%rcx)
+# INTEL: cmpaxadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd %eax, %ecx, -512(%rdx)
-# INTEL: cmpnbexadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, -512(%rdx)
+# INTEL: cmpaxadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnbexadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpaxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbexadd %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpaxadd %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd %r10, %r9, (%rip)
-# INTEL: cmpnbexadd qword ptr [rip], r9, r10
+# ATT:   cmpaxadd %r10, %r9, (%rip)
+# INTEL: cmpaxadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbexadd %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnbexadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpaxadd %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpaxadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnbexadd %r10, %r9, 1016(%rcx)
-# INTEL: cmpnbexadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpaxadd %r10, %r9, 1016(%rcx)
+# INTEL: cmpaxadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnbexadd %r10, %r9, -1024(%rdx)
-# INTEL: cmpnbexadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpaxadd %r10, %r9, -1024(%rdx)
+# INTEL: cmpaxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnbxadd %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbxadd %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbxadd %eax, %ecx, (%rip)
-# INTEL: cmpnbxadd dword ptr [rip], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, (%rip)
+# INTEL: cmpaexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbxadd %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnbxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpaexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnbxadd %eax, %ecx, 508(%rcx)
-# INTEL: cmpnbxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, 508(%rcx)
+# INTEL: cmpaexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnbxadd %eax, %ecx, -512(%rdx)
-# INTEL: cmpnbxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, -512(%rdx)
+# INTEL: cmpaexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnbxadd %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpaexadd %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbxadd %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpaexadd %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbxadd %r10, %r9, (%rip)
-# INTEL: cmpnbxadd qword ptr [rip], r9, r10
+# ATT:   cmpaexadd %r10, %r9, (%rip)
+# INTEL: cmpaexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbxadd %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnbxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpaexadd %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpaexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnbxadd %r10, %r9, 1016(%rcx)
-# INTEL: cmpnbxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpaexadd %r10, %r9, 1016(%rcx)
+# INTEL: cmpaexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnbxadd %r10, %r9, -1024(%rdx)
-# INTEL: cmpnbxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpaexadd %r10, %r9, -1024(%rdx)
+# INTEL: cmpaexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnlexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlexadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd  %eax, %ecx, (%rip)
-# INTEL: cmpnlexadd dword ptr [rip], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, (%rip)
+# INTEL: cmpgxadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlexadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnlexadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpgxadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnlexadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpnlexadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpgxadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpnlexadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpgxadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnlexadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlexadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd  %r10, %r9, (%rip)
-# INTEL: cmpnlexadd qword ptr [rip], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, (%rip)
+# INTEL: cmpgxadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlexadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnlexadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpgxadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnlexadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpnlexadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpgxadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnlexadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpnlexadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpgxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnlxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlxadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd  %eax, %ecx, (%rip)
-# INTEL: cmpnlxadd dword ptr [rip], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, (%rip)
+# INTEL: cmpgexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlxadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnlxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpgexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnlxadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpnlxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpgexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpnlxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpgexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnlxadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlxadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd  %r10, %r9, (%rip)
-# INTEL: cmpnlxadd qword ptr [rip], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, (%rip)
+# INTEL: cmpgexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlxadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnlxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpgexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnlxadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpnlxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpgexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnlxadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpnlxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpgexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff
 
 # ATT:   cmpnoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
@@ -529,52 +529,52 @@
 # INTEL: cmpnsxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnzxadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd  %eax, %ecx, (%rip)
-# INTEL: cmpnzxadd dword ptr [rip], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, (%rip)
+# INTEL: cmpnexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnzxadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnzxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpnexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnzxadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpnzxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpnexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpnzxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpnexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnzxadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd  %r10, %r9, (%rip)
-# INTEL: cmpnzxadd qword ptr [rip], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, (%rip)
+# INTEL: cmpnexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnzxadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnzxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpnexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnzxadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpnzxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpnexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnzxadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpnzxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpnexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff
 
 # ATT:   cmpoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
@@ -721,52 +721,52 @@
 # INTEL: cmpsxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpzxadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd  %eax, %ecx, (%rip)
-# INTEL: cmpzxadd dword ptr [rip], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, (%rip)
+# INTEL: cmpexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpzxadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpzxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpzxadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpzxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpzxadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpzxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpzxadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpexadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd  %r10, %r9, (%rip)
-# INTEL: cmpzxadd qword ptr [rip], r9, r10
+# ATT:   cmpexadd  %r10, %r9, (%rip)
+# INTEL: cmpexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpzxadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpzxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpexadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpzxadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpzxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpexadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpzxadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpzxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpexadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff
 
 # ATT:   cmpbexadd  %ecx, %r8d, (%rip)
diff --git a/llvm/test/MC/X86/apx/cmpccxadd-att.s b/llvm/test/MC/X86/apx/cmpccxadd-att.s
index d6ade869ca1d26..544871274a41d1 100644
--- a/llvm/test/MC/X86/apx/cmpccxadd-att.s
+++ b/llvm/test/MC/X86/apx/cmpccxadd-att.s
@@ -3,21 +3,21 @@
 
 # ERROR-COUNT-60: error:
 # ERROR-NOT: error:
-# CHECK: {evex}	cmpnbexadd	%ecx, %edx, 123(%eax,%ebx,4)
+# CHECK: {evex}	cmpaxadd	%ecx, %edx, 123(%eax,%ebx,4)
 # CHECK: encoding: [0x67,0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b]
-         {evex}	cmpnbexadd	%ecx, %edx, 123(%eax,%ebx,4)
+         {evex}	cmpaxadd	%ecx, %edx, 123(%eax,%ebx,4)
 
-# CHECK: {evex}	cmpnbexadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpaxadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b]
-         {evex}	cmpnbexadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpaxadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnbexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpaxadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpaxadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnbexadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpaxadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpaxadd	%r19, %r23, 291(%r28,%r29,4)
 
 # CHECK: {evex}	cmpbexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe6,0x54,0x98,0x7b]
@@ -51,53 +51,53 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmpbxadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b]
-         {evex}	cmpzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpexadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpzxadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpexadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b]
-         {evex}	cmpzxadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpexadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpexadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpexadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpzxadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpexadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpexadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpnlxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b]
-         {evex}	cmpnlxadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpgexadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpnlxadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgexadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b]
-         {evex}	cmpnlxadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpgexadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnlxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpgexadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpgexadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnlxadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpgexadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpgexadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpnlexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgxadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b]
-         {evex}	cmpnlexadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpgxadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpnlexadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgxadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b]
-         {evex}	cmpnlexadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpgxadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnlexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpgxadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpgxadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnlexadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpgxadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpgxadd	%r19, %r23, 291(%r28,%r29,4)
 
 # CHECK: {evex}	cmplexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xee,0x54,0x98,0x7b]
@@ -131,21 +131,21 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmplxadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpnzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpnexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b]
-         {evex}	cmpnzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpnexadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpnzxadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpnexadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b]
-         {evex}	cmpnzxadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpnexadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpnexadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpnexadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnzxadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpnexadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpnexadd	%r19, %r23, 291(%r28,%r29,4)
 
 # CHECK: {evex}	cmpnoxadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe1,0x54,0x98,0x7b]
diff --git a/llvm/test/MC/X86/apx/cmpccxadd-intel.s b/llvm/test/MC/X86/apx/cmpccxadd-intel.s
index 4c44968fbf91ce..cace33e59d6a74 100644
--- a/llvm/test/MC/X86/apx/cmpccxadd-intel.s
+++ b/llvm/test/MC/X86/apx/cmpccxadd-intel.s
@@ -1,20 +1,20 @@
 # RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-# CHECK: {evex}	cmpnbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpaxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b]
-         {evex}	cmpnbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpaxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnbexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpaxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b]
-         {evex}	cmpnbexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpaxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnbexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpaxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpaxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnbexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpaxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpaxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
 # CHECK: {evex}	cmpbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe6,0x54,0x98,0x7b]
@@ -48,53 +48,53 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmpbxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b]
-         {evex}	cmpzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b]
-         {evex}	cmpzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpnlxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpgexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b]
-         {evex}	cmpnlxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpgexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnlxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpgexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b]
-         {evex}	cmpnlxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpgexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnlxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpgexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpgexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnlxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpgexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpgexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpnlexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpgxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b]
-         {evex}	cmpnlexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpgxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnlexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpgxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b]
-         {evex}	cmpnlexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpgxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnlexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpgxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpgxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnlexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpgxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpgxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
 # CHECK: {evex}	cmplexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xee,0x54,0x98,0x7b]
@@ -128,21 +128,21 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmplxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpnzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpnexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b]
-         {evex}	cmpnzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpnexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpnexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b]
-         {evex}	cmpnzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpnexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpnexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpnexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpnexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpnexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
 # CHECK: {evex}	cmpnoxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe1,0x54,0x98,0x7b]
diff --git a/llvm/test/MC/X86/cmpccxadd-att-alias.s b/llvm/test/MC/X86/cmpccxadd-att-alias.s
index dcc0f105d7abc1..46c6588740b9cd 100644
--- a/llvm/test/MC/X86/cmpccxadd-att-alias.s
+++ b/llvm/test/MC/X86/cmpccxadd-att-alias.s
@@ -1,28 +1,28 @@
 // RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
 
-// CHECK: cmpnbxadd  %eax, %ecx, (%rip)
+// CHECK: cmpaexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpaexadd  %eax, %ecx, (%rip)
+          cmpnbxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpexadd  %eax, %ecx, (%rip)
+          cmpzxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpnexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnexadd  %eax, %ecx, (%rip)
+          cmpnzxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbexadd  %eax, %ecx, (%rip)
+// CHECK: cmpaxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpaxadd  %eax, %ecx, (%rip)
+          cmpnbexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlxadd  %eax, %ecx, (%rip)
+// CHECK: cmpgexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpgexadd  %eax, %ecx, (%rip)
+          cmpnlxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlexadd  %eax, %ecx, (%rip)
+// CHECK: cmpgxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpgxadd  %eax, %ecx, (%rip)
+          cmpnlexadd  %eax, %ecx, (%rip)
 
 // CHECK: cmpbxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
@@ -32,7 +32,7 @@
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
           cmpnaexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbxadd  %eax, %ecx, (%rip)
+// CHECK: cmpaexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
           cmpncxadd  %eax, %ecx, (%rip)
 
diff --git a/llvm/test/MC/X86/cmpccxadd-att.s b/llvm/test/MC/X86/cmpccxadd-att.s
index c79cc55a15b81d..a7c9df91ab0c8e 100644
--- a/llvm/test/MC/X86/cmpccxadd-att.s
+++ b/llvm/test/MC/X86/cmpccxadd-att.s
@@ -196,197 +196,197 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff]
           cmplxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnbexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpaxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpaxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbexadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpaxadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpaxadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnbexadd  %eax, %ecx, (%rip)
+// CHECK: cmpaxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd  %eax, %ecx, (%rip)
+          cmpaxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbexadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpaxadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbexadd  %eax, %ecx, -128(,%rbp,2)
+          cmpaxadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnbexadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpaxadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbexadd  %eax, %ecx, 508(%rcx)
+          cmpaxadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnbexadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpaxadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbexadd  %eax, %ecx, -512(%rdx)
+          cmpaxadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnbexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpaxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpaxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbexadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpaxadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpaxadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnbexadd  %r10, %r9, (%rip)
+// CHECK: cmpaxadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd  %r10, %r9, (%rip)
+          cmpaxadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnbexadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpaxadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbexadd  %r10, %r9, -256(,%rbp,2)
+          cmpaxadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnbexadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpaxadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbexadd  %r10, %r9, 1016(%rcx)
+          cmpaxadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnbexadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpaxadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbexadd  %r10, %r9, -1024(%rdx)
+          cmpaxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnbxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpaexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpaexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpaexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpaexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnbxadd  %eax, %ecx, (%rip)
+// CHECK: cmpaexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd  %eax, %ecx, (%rip)
+          cmpaexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpaexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpaexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnbxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpaexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbxadd  %eax, %ecx, 508(%rcx)
+          cmpaexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnbxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpaexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbxadd  %eax, %ecx, -512(%rdx)
+          cmpaexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnbxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpaexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpaexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpaexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpaexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnbxadd  %r10, %r9, (%rip)
+// CHECK: cmpaexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd  %r10, %r9, (%rip)
+          cmpaexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnbxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpaexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbxadd  %r10, %r9, -256(,%rbp,2)
+          cmpaexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnbxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpaexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbxadd  %r10, %r9, 1016(%rcx)
+          cmpaexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnbxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpaexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbxadd  %r10, %r9, -1024(%rdx)
+          cmpaexadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnlexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpgxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpgxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlexadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpgxadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpgxadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnlexadd  %eax, %ecx, (%rip)
+// CHECK: cmpgxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd  %eax, %ecx, (%rip)
+          cmpgxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlexadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpgxadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlexadd  %eax, %ecx, -128(,%rbp,2)
+          cmpgxadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnlexadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpgxadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlexadd  %eax, %ecx, 508(%rcx)
+          cmpgxadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnlexadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpgxadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlexadd  %eax, %ecx, -512(%rdx)
+          cmpgxadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnlexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpgxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpgxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlexadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpgxadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpgxadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnlexadd  %r10, %r9, (%rip)
+// CHECK: cmpgxadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd  %r10, %r9, (%rip)
+          cmpgxadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnlexadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpgxadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlexadd  %r10, %r9, -256(,%rbp,2)
+          cmpgxadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnlexadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpgxadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlexadd  %r10, %r9, 1016(%rcx)
+          cmpgxadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnlexadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpgxadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlexadd  %r10, %r9, -1024(%rdx)
+          cmpgxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnlxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpgexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpgexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpgexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpgexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnlxadd  %eax, %ecx, (%rip)
+// CHECK: cmpgexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd  %eax, %ecx, (%rip)
+          cmpgexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpgexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpgexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnlxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpgexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlxadd  %eax, %ecx, 508(%rcx)
+          cmpgexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnlxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpgexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlxadd  %eax, %ecx, -512(%rdx)
+          cmpgexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnlxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpgexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpgexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpgexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpgexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnlxadd  %r10, %r9, (%rip)
+// CHECK: cmpgexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd  %r10, %r9, (%rip)
+          cmpgexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnlxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpgexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlxadd  %r10, %r9, -256(,%rbp,2)
+          cmpgexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnlxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpgexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlxadd  %r10, %r9, 1016(%rcx)
+          cmpgexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnlxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpgexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlxadd  %r10, %r9, -1024(%rdx)
+          cmpgexadd  %r10, %r9, -1024(%rdx)
 
 // CHECK: cmpnoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe1,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -532,53 +532,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff]
           cmpnsxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpnexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpnexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnzxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpnexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpnexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpnexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd  %eax, %ecx, (%rip)
+          cmpnexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnzxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpnexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnzxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpnexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnzxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpnexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00]
-          cmpnzxadd  %eax, %ecx, 508(%rcx)
+          cmpnexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnzxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpnexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnzxadd  %eax, %ecx, -512(%rdx)
+          cmpnexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpnexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpnexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnzxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpnexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpnexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnzxadd  %r10, %r9, (%rip)
+// CHECK: cmpnexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd  %r10, %r9, (%rip)
+          cmpnexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnzxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpnexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnzxadd  %r10, %r9, -256(,%rbp,2)
+          cmpnexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnzxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpnexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00]
-          cmpnzxadd  %r10, %r9, 1016(%rcx)
+          cmpnexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnzxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpnexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnzxadd  %r10, %r9, -1024(%rdx)
+          cmpnexadd  %r10, %r9, -1024(%rdx)
 
 // CHECK: cmpoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe0,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -724,53 +724,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff]
           cmpsxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpzxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd  %eax, %ecx, (%rip)
+          cmpexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpzxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpzxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpzxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00]
-          cmpzxadd  %eax, %ecx, 508(%rcx)
+          cmpexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpzxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff]
-          cmpzxadd  %eax, %ecx, -512(%rdx)
+          cmpexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpzxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpzxadd  %r10, %r9, (%rip)
+// CHECK: cmpexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd  %r10, %r9, (%rip)
+          cmpexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpzxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpzxadd  %r10, %r9, -256(,%rbp,2)
+          cmpexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpzxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00]
-          cmpzxadd  %r10, %r9, 1016(%rcx)
+          cmpexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpzxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff]
-          cmpzxadd  %r10, %r9, -1024(%rdx)
+          cmpexadd  %r10, %r9, -1024(%rdx)
 
 // CHECK: cmpbexadd  %ecx, %r8d, (%rip)
 // CHECK: encoding: [0xc4,0x62,0x71,0xe6,0x05,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/X86/cmpccxadd-intel-alias.s b/llvm/test/MC/X86/cmpccxadd-intel-alias.s
index f5c7a6b6a2e0a5..6228d7fc67231d 100644
--- a/llvm/test/MC/X86/cmpccxadd-intel-alias.s
+++ b/llvm/test/MC/X86/cmpccxadd-intel-alias.s
@@ -1,28 +1,28 @@
 // RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK: cmpnbxadd dword ptr [rip], ecx, eax
+// CHECK: cmpaexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpaexadd dword ptr [rip], ecx, eax
+          cmpnbxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpexadd dword ptr [rip], ecx, eax
+          cmpzxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpnexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnexadd dword ptr [rip], ecx, eax
+          cmpnzxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rip], ecx, eax
+// CHECK: cmpaxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpaxadd dword ptr [rip], ecx, eax
+          cmpnbexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rip], ecx, eax
+// CHECK: cmpgexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpgexadd dword ptr [rip], ecx, eax
+          cmpnlxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rip], ecx, eax
+// CHECK: cmpgxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpgxadd dword ptr [rip], ecx, eax
+          cmpnlexadd dword ptr [rip], ecx, eax
 
 // CHECK: cmpbxadd  dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
@@ -32,7 +32,7 @@
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
           cmpnaexadd  dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbxadd  dword ptr [rip], ecx, eax
+// CHECK: cmpaexadd  dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
           cmpncxadd  dword ptr [rip], ecx, eax
 
diff --git a/llvm/test/MC/X86/cmpccxadd-intel.s b/llvm/test/MC/X86/cmpccxadd-intel.s
index c03873e34decea..af7c6c3b61c949 100644
--- a/llvm/test/MC/X86/cmpccxadd-intel.s
+++ b/llvm/test/MC/X86/cmpccxadd-intel.s
@@ -192,197 +192,197 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff]
           cmplxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rip], ecx, eax
+// CHECK: cmpaxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd dword ptr [rip], ecx, eax
+          cmpaxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpaxadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbexadd dword ptr [2*rbp - 128], ecx, eax
+          cmpaxadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpaxadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbexadd dword ptr [rcx + 508], ecx, eax
+          cmpaxadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpaxadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbexadd dword ptr [rdx - 512], ecx, eax
+          cmpaxadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [rip], r9, r10
+// CHECK: cmpaxadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd qword ptr [rip], r9, r10
+          cmpaxadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpaxadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbexadd qword ptr [2*rbp - 256], r9, r10
+          cmpaxadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpaxadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbexadd qword ptr [rcx + 1016], r9, r10
+          cmpaxadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpaxadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbexadd qword ptr [rdx - 1024], r9, r10
+          cmpaxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [rip], ecx, eax
+// CHECK: cmpaexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd dword ptr [rip], ecx, eax
+          cmpaexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpaexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpaexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpaexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbxadd dword ptr [rcx + 508], ecx, eax
+          cmpaexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpaexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbxadd dword ptr [rdx - 512], ecx, eax
+          cmpaexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [rip], r9, r10
+// CHECK: cmpaexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd qword ptr [rip], r9, r10
+          cmpaexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpaexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbxadd qword ptr [2*rbp - 256], r9, r10
+          cmpaexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpaexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbxadd qword ptr [rcx + 1016], r9, r10
+          cmpaexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpaexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbxadd qword ptr [rdx - 1024], r9, r10
+          cmpaexadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rip], ecx, eax
+// CHECK: cmpgxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd dword ptr [rip], ecx, eax
+          cmpgxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpgxadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlexadd dword ptr [2*rbp - 128], ecx, eax
+          cmpgxadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpgxadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlexadd dword ptr [rcx + 508], ecx, eax
+          cmpgxadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpgxadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlexadd dword ptr [rdx - 512], ecx, eax
+          cmpgxadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [rip], r9, r10
+// CHECK: cmpgxadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd qword ptr [rip], r9, r10
+          cmpgxadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpgxadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlexadd qword ptr [2*rbp - 256], r9, r10
+          cmpgxadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpgxadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlexadd qword ptr [rcx + 1016], r9, r10
+          cmpgxadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpgxadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlexadd qword ptr [rdx - 1024], r9, r10
+          cmpgxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rip], ecx, eax
+// CHECK: cmpgexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd dword ptr [rip], ecx, eax
+          cmpgexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpgexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpgexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpgexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlxadd dword ptr [rcx + 508], ecx, eax
+          cmpgexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpgexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlxadd dword ptr [rdx - 512], ecx, eax
+          cmpgexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [rip], r9, r10
+// CHECK: cmpgexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd qword ptr [rip], r9, r10
+          cmpgexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpgexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlxadd qword ptr [2*rbp - 256], r9, r10
+          cmpgexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpgexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlxadd qword ptr [rcx + 1016], r9, r10
+          cmpgexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpgexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlxadd qword ptr [rdx - 1024], r9, r10
+          cmpgexadd qword ptr [rdx - 1024], r9, r10
 
 // CHECK: cmpnoxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe1,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -528,53 +528,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff]
           cmpnsxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpnexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd dword ptr [rip], ecx, eax
+          cmpnexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpnexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnzxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpnexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpnexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00]
-          cmpnzxadd dword ptr [rcx + 508], ecx, eax
+          cmpnexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpnexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnzxadd dword ptr [rdx - 512], ecx, eax
+          cmpnexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [rip], r9, r10
+// CHECK: cmpnexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd qword ptr [rip], r9, r10
+          cmpnexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpnexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnzxadd qword ptr [2*rbp - 256], r9, r10
+          cmpnexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpnexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00]
-          cmpnzxadd qword ptr [rcx + 1016], r9, r10
+          cmpnexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpnexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnzxadd qword ptr [rdx - 1024], r9, r10
+          cmpnexadd qword ptr [rdx - 1024], r9, r10
 
 // CHECK: cmpoxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe0,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -720,53 +720,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff]
           cmpsxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd dword ptr [rip], ecx, eax
+          cmpexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpzxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00]
-          cmpzxadd dword ptr [rcx + 508], ecx, eax
+          cmpexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff]
-          cmpzxadd dword ptr [rdx - 512], ecx, eax
+          cmpexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpzxadd qword ptr [rip], r9, r10
+// CHECK: cmpexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd qword ptr [rip], r9, r10
+          cmpexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpzxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpzxadd qword ptr [2*rbp - 256], r9, r10
+          cmpexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpzxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00]
-          cmpzxadd qword ptr [rcx + 1016], r9, r10
+          cmpexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpzxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff]
-          cmpzxadd qword ptr [rdx - 1024], r9, r10
+          cmpexadd qword ptr [rdx - 1024], r9, r10
 
 // CHECK: cmpbexadd dword ptr [rip], r8d, ecx
 // CHECK: encoding: [0xc4,0x62,0x71,0xe6,0x05,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
index 56c7c7519f6945..a70f94e1521f0d 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
 ; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
 ; RUN:   -S < %s | FileCheck --check-prefix=INSTRUMENT %s
-; RUN: opt -passes=pgo-instr-gen,ctx-instr-lower -profile-context-root=an_entrypoint \
+; RUN: opt -passes=pgo-instr-gen,assign-guid,ctx-instr-lower -profile-context-root=an_entrypoint \
 ; RUN:   -profile-context-root=another_entrypoint_no_callees \
 ; RUN:   -S < %s | FileCheck --check-prefix=LOWERING %s
 
@@ -46,7 +46,7 @@ define void @foo(i32 %a, ptr %fct) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @foo(
-; LOWERING-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) !guid [[META0:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @foo, i64 6699318081062747564, i32 2, i32 2)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
@@ -104,7 +104,7 @@ define void @an_entrypoint(i32 %a) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @an_entrypoint(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META1:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @an_entrypoint_ctx_root, i64 4909520559318251808, i32 2, i32 1)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
@@ -154,7 +154,7 @@ define void @another_entrypoint_no_callees(i32 %a) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @another_entrypoint_no_callees(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META2:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @another_entrypoint_no_callees_ctx_root, i64 -6371873725078000974, i32 2, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
@@ -188,7 +188,7 @@ define void @simple(i32 %a) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @simple(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META3:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @simple, i64 -3006003237940970099, i32 1, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
@@ -212,7 +212,7 @@ define i32 @no_callsites(i32 %a) {
 ; INSTRUMENT-NEXT:    ret i32 0
 ;
 ; LOWERING-LABEL: define i32 @no_callsites(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META4:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
@@ -243,7 +243,8 @@ define void @no_counters() {
 ; INSTRUMENT-NEXT:    call void @bar()
 ; INSTRUMENT-NEXT:    ret void
 ;
-; LOWERING-LABEL: define void @no_counters() {
+; LOWERING-LABEL: define void @no_counters(
+; LOWERING-SAME: ) !guid [[META5:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_counters, i64 5458232184388660970, i32 1, i32 1)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
@@ -263,8 +264,15 @@ define void @no_counters() {
   ret void
 }
 ;.
-; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
-;.
 ; LOWERING: attributes #[[ATTR0:[0-9]+]] = { nounwind }
 ; LOWERING: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
+; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.
+; LOWERING: [[META0]] = !{i64 6699318081062747564}
+; LOWERING: [[META1]] = !{i64 4909520559318251808}
+; LOWERING: [[META2]] = !{i64 -6371873725078000974}
+; LOWERING: [[META3]] = !{i64 -3006003237940970099}
+; LOWERING: [[META4]] = !{i64 5679753335911435902}
+; LOWERING: [[META5]] = !{i64 5458232184388660970}
+;.
diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
index 18ac2f92aa39d4..cb8ab78dc0f414 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
@@ -12,7 +12,7 @@ declare void @bar()
 ;.
 define void @foo(i32 %a, ptr %fct) {
 ; CHECK-LABEL: define void @foo(
-; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
+; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr !guid [[META0:![0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
 ; CHECK-NEXT:    [[T:%.*]] = icmp eq i32 [[A]], 0
 ; CHECK-NEXT:    br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
@@ -42,3 +42,5 @@ exit:
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
 ;.
+; CHECK: [[META0]] = !{i64 6699318081062747564}
+;.
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 9b37b49b3d49d2..cff5019f8e6ee4 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -32,15 +32,21 @@
 # RUN: FileCheck %s --check-prefix=AVAIL --input-file %t3.txt
 # RUN: FileCheck %s --check-prefix=UNAVAIL --input-file %t3.txt
 #
-# CHECK: << Total TLI yes SDK no:  12
+# CHECK: << Total TLI yes SDK no:  18
 # CHECK: >> Total TLI no  SDK yes: 0
 # CHECK: == Total TLI yes SDK yes: 248
 #
 # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
 # WRONG_DETAIL: >> TLI no  SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
-# WRONG_DETAIL-COUNT-8: << TLI yes SDK no : '_Zn{{.*}}__hot_cold_t
+# WRONG_DETAIL-COUNT-8: << TLI yes SDK no : {{.*}}__hot_cold_t
 # WRONG_DETAIL-COUNT-4: << TLI yes SDK no : '__size_returning_new{{.*}}
-# WRONG_SUMMARY: << Total TLI yes SDK no:  13{{$}}
+# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_num'
+# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_numf'
+# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_numl'
+# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_num'
+# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numf'
+# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl'
+# WRONG_SUMMARY: << Total TLI yes SDK no:  19{{$}}
 # WRONG_SUMMARY: >> Total TLI no  SDK yes: 1{{$}}
 # WRONG_SUMMARY: == Total TLI yes SDK yes: 247
 #
@@ -48,8 +54,8 @@
 ## the exact count first; the two directives should add up to that.
 ## Yes, this means additions to TLI will fail this test, but the argument
 ## to -COUNT can't be an expression.
-# AVAIL: TLI knows 493 symbols, 260 available
-# AVAIL-COUNT-260: {{^}} available
+# AVAIL: TLI knows 499 symbols, 266 available
+# AVAIL-COUNT-266: {{^}} available
 # AVAIL-NOT:       {{^}} available
 # UNAVAIL-COUNT-233: not available
 # UNAVAIL-NOT:       not available
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 68bf8e670771ee..ff7dec5bee31df 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -212,6 +212,12 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare double @fmin(double, double)\n"
       "declare float @fminf(float, float)\n"
       "declare x86_fp80 @fminl(x86_fp80, x86_fp80)\n"
+      "declare double @fmaximum_num(double, double)\n"
+      "declare float @fmaximum_numf(float, float)\n"
+      "declare x86_fp80 @fmaximum_numl(x86_fp80, x86_fp80)\n"
+      "declare double @fminimum_num(double, double)\n"
+      "declare float @fminimum_numf(float, float)\n"
+      "declare x86_fp80 @fminimum_numl(x86_fp80, x86_fp80)\n"
       "declare double @fmod(double, double)\n"
       "declare float @fmodf(float, float)\n"
       "declare x86_fp80 @fmodl(x86_fp80, x86_fp80)\n"
diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
index 074247e6e7d184..b9414be98623af 100644
--- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
+++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
@@ -238,9 +238,11 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) {
   SDLoc DL;
   auto Int32VT = EVT::getIntegerVT(Context, 32);
   auto Int64VT = EVT::getIntegerVT(Context, 64);
+  auto FloatVT = EVT::getFloatingPointVT(32);
 
   SDValue Op0 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT);
   SDValue Op1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int64VT);
+  SDValue Op2 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, FloatVT);
 
   SDValue ZExt = DAG->getNode(ISD::ZERO_EXTEND, DL, Int64VT, Op0);
   SDValue SExt = DAG->getNode(ISD::SIGN_EXTEND, DL, Int64VT, Op0);
@@ -252,6 +254,9 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) {
 
   SDValue VScale = DAG->getVScale(DL, Int32VT, APInt::getMaxValue(32));
 
+  SDValue FPToSI = DAG->getNode(ISD::FP_TO_SINT, DL, FloatVT, Op2);
+  SDValue FPToUI = DAG->getNode(ISD::FP_TO_UINT, DL, FloatVT, Op2);
+
   using namespace SDPatternMatch;
   EXPECT_TRUE(sd_match(ZExt, m_UnaryOp(ISD::ZERO_EXTEND, m_Value())));
   EXPECT_TRUE(sd_match(SExt, m_SExt(m_Value())));
@@ -263,6 +268,11 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) {
   EXPECT_FALSE(sd_match(Sub, m_Neg(m_Value())));
   EXPECT_FALSE(sd_match(Neg, m_Not(m_Value())));
   EXPECT_TRUE(sd_match(VScale, m_VScale(m_Value())));
+
+  EXPECT_TRUE(sd_match(FPToUI, m_FPToUI(m_Value())));
+  EXPECT_TRUE(sd_match(FPToSI, m_FPToSI(m_Value())));
+  EXPECT_FALSE(sd_match(FPToUI, m_FPToSI(m_Value())));
+  EXPECT_FALSE(sd_match(FPToSI, m_FPToUI(m_Value())));
 }
 
 TEST_F(SelectionDAGPatternMatchTest, matchConstants) {
diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp
index 113bb578f6bc3b..dcb2e614f4c40d 100644
--- a/llvm/unittests/IR/DataLayoutTest.cpp
+++ b/llvm/unittests/IR/DataLayoutTest.cpp
@@ -19,6 +19,111 @@ using namespace llvm;
 
 namespace {
 
+// TODO: Split into multiple TESTs.
+TEST(DataLayoutTest, ParseErrors) {
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("^"),
+      FailedWithMessage("Unknown specifier in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("m:v"),
+      FailedWithMessage("Unknown mangling in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("n0"),
+      FailedWithMessage("Zero width native integer type in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p16777216:64:64:64"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("a1:64"),
+      FailedWithMessage("Sized aggregate specification in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("a:"),
+      FailedWithMessage("Trailing separator in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:48:52"),
+      FailedWithMessage("number of bits must be a byte width multiple"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("e-p"),
+      FailedWithMessage(
+          "Missing size specification for pointer in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("e-p:64"),
+      FailedWithMessage(
+          "Missing alignment specification for pointer in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("m"),
+      FailedWithMessage("Expected mangling specifier in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("m."),
+      FailedWithMessage("Unexpected trailing characters after mangling "
+                        "specifier in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("f"),
+      FailedWithMessage(
+          "Missing alignment specification in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse(":32"),
+      FailedWithMessage(
+          "Expected token before separator in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i64:64:16"),
+      FailedWithMessage(
+          "Preferred alignment cannot be less than the ABI alignment"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i64:16:16777216"),
+      FailedWithMessage(
+          "Invalid preferred alignment, must be a 16bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i64:16777216:16777216"),
+      FailedWithMessage("Invalid ABI alignment, must be a 16bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i16777216:16:16"),
+      FailedWithMessage("Invalid bit width, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:32:32:16"),
+      FailedWithMessage(
+          "Preferred alignment cannot be less than the ABI alignment"));
+  EXPECT_THAT_EXPECTED(DataLayout::parse("p:0:32:32"),
+                       FailedWithMessage("Invalid pointer size of 0 bytes"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:64:24:64"),
+      FailedWithMessage("Pointer ABI alignment must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:64:64:24"),
+      FailedWithMessage("Pointer preferred alignment must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:64:64:64:128"),
+      FailedWithMessage("Index width cannot be larger than pointer width"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("v128:0:128"),
+      FailedWithMessage(
+          "ABI alignment specification must be >0 for non-aggregate types"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i32:24:32"),
+      FailedWithMessage("Invalid ABI alignment, must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i32:32:24"),
+      FailedWithMessage("Invalid preferred alignment, must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("A16777216"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("G16777216"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("P16777216"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("Fi24"),
+      FailedWithMessage("Alignment is neither 0 nor a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i8:16"),
+      FailedWithMessage("Invalid ABI alignment, i8 must be naturally aligned"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("S24"),
+      FailedWithMessage("Alignment is neither 0 nor a power of 2"));
+}
+
 TEST(DataLayoutTest, CopyAssignmentInvalidatesStructLayout) {
   DataLayout DL1 = cantFail(DataLayout::parse("p:32:32"));
   DataLayout DL2 = cantFail(DataLayout::parse("p:64:64"));
diff --git a/llvm/utils/git/linkify b/llvm/utils/git/linkify
new file mode 100755
index 00000000000000..9fcadd758492cf
--- /dev/null
+++ b/llvm/utils/git/linkify
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+# This script linkifies (i.e. makes clickable in the terminal) text that appears
+# to be a pull request or issue reference (e.g. #12345 or PR12345) or a
+# 40-character commit hash (e.g. abc123). You can configure git to automatically
+# send the output of commands that pipe their output through a pager, such as
+# `git log` and `git show`, through this script by running this command from
+# within your LLVM checkout:
+#
+# git config core.pager 'llvm/utils/git/linkify | pager'
+#
+# The pager command is run from the root of the repository even if the git
+# command is run from a subdirectory, so the relative path should always work.
+#
+# It requires OSC 8 support in the terminal. For a list of compatible terminals,
+# see https://github.com/Alhadis/OSC8-Adoption
+
+sed \
+  -e 's,\(#\|\bPR\)\([0-9]\+\),\x1b]8;;https://github.com/llvm/llvm-project/issues/\2\x1b\\\0\x1b]8;;\x1b\\,gi' \
+  -e 's,[0-9a-f]\{40\},\x1b]8;;https://github.com/llvm/llvm-project/commit/\0\x1b\\\0\x1b]8;;\x1b\\,g'
diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index da7fa86fd39173..cc903f9e3a1520 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -767,6 +767,10 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         # echo-appending to a file.
         # FIXME: Standardize on the builtin echo implementation. We can use a
         # temporary file to sidestep blocking pipe write issues.
+
+        # Ensure args[0] is hashable.
+        args[0] = expand_glob(args[0], cmd_shenv.cwd)[0]
+
         inproc_builtin = inproc_builtins.get(args[0], None)
         if inproc_builtin and (args[0] != "echo" or len(cmd.commands) == 1):
             # env calling an in-process builtin is useless, so we take the safe
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/check_path.py b/llvm/utils/lit/tests/Inputs/check_path.py
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/check_path.py
rename to llvm/utils/lit/tests/Inputs/check_path.py
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-0.txt
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt
rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-0.txt
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-1.txt
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt
rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-1.txt
diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
new file mode 100644
index 00000000000000..7375a7497e5bec
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
@@ -0,0 +1,83 @@
+## Test cat command with a single file.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp.write
+# RUN: cat %T/testCat/temp.write > %T/testCat/tempcat.write
+# RUN: %{python} %S/../check_path.py file %T/testCat/tempcat.write > %T/testCat/path.out
+# RUN: FileCheck --check-prefix=FILE-EXISTS < %T/testCat/path.out %s
+# RUN: FileCheck --check-prefix=CAT-OUTPUT < %T/testCat/tempcat.write %s
+# FILE-EXISTS: True
+# CAT-OUTPUT: abcdefgh
+#
+## Test cat command with multiple files.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp1.write
+# RUN: echo "efghijkl" > %T/testCat/temp2.write
+# RUN: echo "mnopqrst" > %T/testCat/temp3.write
+# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write %T/testCat/temp3.write > %T/testCat/tempmulticat.write
+# RUN: %{python} %S/../check_path.py file %T/testCat/tempmulticat.write > %T/testCat/path.out
+# RUN: FileCheck --check-prefix=MULTI-FILE-EXISTS < %T/testCat/path.out %s
+# RUN: FileCheck --check-prefix=MULTI-CAT-OUTPUT < %T/testCat/tempmulticat.write %s
+# MULTI-FILE-EXISTS: True
+# MULTI-CAT-OUTPUT: abcdefgh
+# MULTI-CAT-OUTPUT-NEXT: efghijkl
+# MULTI-CAT-OUTPUT-NEXT: mnopqrst
+#
+## Test cat command with multiple files and piped output to FileCheck.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp1.write
+# RUN: echo "efghijkl" > %T/testCat/temp2.write
+# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write | FileCheck --check-prefix=PIPED-CAT-OUTPUT %s
+# PIPED-CAT-OUTPUT: abcdefgh
+# PIPED-CAT-OUTPUT-NEXT: efghijkl
+#
+## Test cat command with multiple files and glob expressions.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "cvbnm" > %T/testCat/temp1.write
+# RUN: echo "qwerty" > %T/testCat/temp2.write
+# RUN: cat %T/testCat/*.write | FileCheck --check-prefix=GLOB-CAT-OUTPUT %s
+# GLOB-CAT-OUTPUT: cvbnm
+# GLOB-CAT-OUTPUT-NEXT: qwerty
+#
+## Test cat command with -v option
+#
+# RUN: cat -v %S/cat_nonprinting.bin | FileCheck --check-prefix=NP-CAT-OUTPUT %s
+# NP-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
+# NP-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
+# NP-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
+# NP-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
+# NP-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
+# NP-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
+# NP-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
+# NP-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
+# NP-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
+# NP-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
+# NP-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
+# NP-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
+# NP-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
+# NP-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
+#
+## Test cat command with -show-nonprinting option
+#
+# RUN: cat --show-nonprinting %S/cat_nonprinting.bin | FileCheck --check-prefix=NPLONG-CAT-OUTPUT %s
+# NPLONG-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
+# NPLONG-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
+# NPLONG-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
+# NPLONG-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
+# NPLONG-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
+# NPLONG-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
+# NPLONG-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
+# NPLONG-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
+# NPLONG-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
+# NPLONG-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
+# NPLONG-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
+# NPLONG-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
+# NPLONG-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
+# NPLONG-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin b/llvm/utils/lit/tests/Inputs/shtest-cat/cat_nonprinting.bin
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin
rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat_nonprinting.bin
diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg
new file mode 100644
index 00000000000000..8f197946e28b5c
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg
@@ -0,0 +1,8 @@
+import lit.formats
+
+config.name = "shtest-cat"
+config.suffixes = [".txt"]
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
+config.substitutions.append(("%{python}", '"%s"' % (sys.executable)))
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt b/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
index 7267b9b9ef5aba..75ce8b7733ad7d 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
@@ -2,13 +2,13 @@
 # Check force remove commands success whether the file does or doesn't exist.
 #
 # RUN: rm -f %t.write
-# RUN: %{python} %S/check_path.py file %t.write > %t.out
+# RUN: %{python} %S/../check_path.py file %t.write > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s
 # RUN: echo "create a temp file" > %t.write
-# RUN: %{python} %S/check_path.py file %t.write > %t.out
+# RUN: %{python} %S/../check_path.py file %t.write > %t.out
 # RUN: FileCheck --check-prefix=FILE-EXIST < %t.out %s
 # RUN: rm -f %t.write
-# RUN: %{python} %S/check_path.py file %t.write > %t.out
+# RUN: %{python} %S/../check_path.py file %t.write > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s
 #
 # REMOVE-FILE: False
@@ -19,14 +19,14 @@
 #
 # Check the mkdir command with -p option.
 # RUN: rm -f -r %T/test
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s
 # RUN: mkdir -p %T/test
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=MAKE-PARENT-DIR < %t.out %s
 # RUN: rm -f %T/test || true
 # RUN: rm -f -r %T/test
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s
 #
 # MAKE-PARENT-DIR: True
@@ -36,13 +36,13 @@
 #
 # RUN: rm -rf %T/test1
 # RUN: mkdir %T/test1
-# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out
 # RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s
 # RUN: cd %T/test1 && mkdir foo
-# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out
 # RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s
 # RUN: cd %T && rm -rf %T/test1
-# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-DIR < %t.out %s
 #
 # MAKE-DIR: True
@@ -52,16 +52,16 @@
 #
 # RUN: rm -rf %T/test
 # RUN: mkdir -p %T/test/test1 %T/test/test2
-# RUN: %{python} %S/check_path.py dir %T/test %T/test/test1 %T/test/test2 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test %T/test/test1 %T/test/test2 > %t.out
 # RUN: FileCheck --check-prefix=DIRS-EXIST < %t.out %s
 # RUN: mkdir %T/test || true
 # RUN: echo "create a temp file" > %T/test/temp.write
 # RUN: echo "create a temp1 file" > %T/test/test1/temp1.write
 # RUN: echo "create a temp2 file" > %T/test/test2/temp2.write
-# RUN: %{python} %S/check_path.py file %T/test/temp.write %T/test/test1/temp1.write %T/test/test2/temp2.write> %t.out
+# RUN: %{python} %S/../check_path.py file %T/test/temp.write %T/test/test1/temp1.write %T/test/test2/temp2.write> %t.out
 # RUN: FileCheck --check-prefix=FILES-EXIST < %t.out %s
 # RUN: rm -r -f %T/*
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-ALL < %t.out %s
 #
 # DIRS-EXIST: True
@@ -85,87 +85,3 @@
 # RUN: cd %T/dir1 && echo "hello" > temp1.txt
 # RUN: cd %T/dir2 && echo "hello" > temp2.txt
 # RUN: diff temp2.txt ../dir1/temp1.txt
-#
-# Check cat command with single file.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "abcdefgh" > %T/testCat/temp.write
-# RUN: cat %T/testCat/temp.write > %T/testCat/tempcat.write
-# RUN: %{python} %S/check_path.py file %T/testCat/tempcat.write > %T/testCat/path.out
-# RUN: FileCheck --check-prefix=FILE-EXISTS < %T/testCat/path.out %s
-# RUN: FileCheck --check-prefix=CAT-OUTPUT < %T/testCat/tempcat.write %s
-# FILE-EXISTS: True
-# CAT-OUTPUT: abcdefgh
-#
-# Check cat command with multiple files.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "abcdefgh" > %T/testCat/temp1.write
-# RUN: echo "efghijkl" > %T/testCat/temp2.write
-# RUN: echo "mnopqrst" > %T/testCat/temp3.write
-# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write %T/testCat/temp3.write > %T/testCat/tempmulticat.write
-# RUN: %{python} %S/check_path.py file %T/testCat/tempmulticat.write > %T/testCat/path.out
-# RUN: FileCheck --check-prefix=MULTI-FILE-EXISTS < %T/testCat/path.out %s
-# RUN: FileCheck --check-prefix=MULTI-CAT-OUTPUT < %T/testCat/tempmulticat.write %s
-# MULTI-FILE-EXISTS: True
-# MULTI-CAT-OUTPUT: abcdefgh
-# MULTI-CAT-OUTPUT-NEXT: efghijkl
-# MULTI-CAT-OUTPUT-NEXT: mnopqrst
-#
-# Check cat command with multiple files and piped output to FileCheck.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "abcdefgh" > %T/testCat/temp1.write
-# RUN: echo "efghijkl" > %T/testCat/temp2.write
-# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write | FileCheck --check-prefix=PIPED-CAT-OUTPUT %s
-# PIPED-CAT-OUTPUT: abcdefgh
-# PIPED-CAT-OUTPUT-NEXT: efghijkl
-#
-# Check cat command with multiple files and glob expressions.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "cvbnm" > %T/testCat/temp1.write
-# RUN: echo "qwerty" > %T/testCat/temp2.write
-# RUN: cat %T/testCat/*.write | FileCheck --check-prefix=GLOB-CAT-OUTPUT %s
-# GLOB-CAT-OUTPUT: cvbnm
-# GLOB-CAT-OUTPUT-NEXT: qwerty
-#
-# Check cat command with -v option
-#
-# RUN: cat -v %S/cat_nonprinting.bin | FileCheck --check-prefix=NP-CAT-OUTPUT %s
-# NP-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
-# NP-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
-# NP-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
-# NP-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
-# NP-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
-# NP-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
-# NP-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
-# NP-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
-# NP-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
-# NP-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
-# NP-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
-# NP-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
-# NP-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
-# NP-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
-#
-# Check cat command with -show-nonprinting option
-#
-# RUN: cat --show-nonprinting %S/cat_nonprinting.bin | FileCheck --check-prefix=NPLONG-CAT-OUTPUT %s
-# NPLONG-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
-# NPLONG-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
-# NPLONG-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
-# NPLONG-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
-# NPLONG-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
-# NPLONG-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
-# NPLONG-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
-# NPLONG-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
-# NPLONG-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
-# NPLONG-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
-# NPLONG-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
-# NPLONG-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
-# NPLONG-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
-# NPLONG-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
diff --git a/llvm/utils/lit/tests/shtest-cat.py b/llvm/utils/lit/tests/shtest-cat.py
new file mode 100644
index 00000000000000..5efe25c41684a1
--- /dev/null
+++ b/llvm/utils/lit/tests/shtest-cat.py
@@ -0,0 +1,23 @@
+## Test the cat command.
+#
+# RUN: not %{lit} -a -v %{inputs}/shtest-cat \
+# RUN: | FileCheck -match-full-lines %s
+# END.
+
+# CHECK: FAIL: shtest-cat :: cat-error-0.txt ({{[^)]*}})
+# CHECK: cat -b temp1.txt
+# CHECK: # .---command stderr{{-*}}
+# CHECK-NEXT: # | Unsupported: 'cat':  option -b not recognized
+# CHECK: # error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-cat :: cat-error-1.txt ({{[^)]*}})
+# CHECK: cat temp1.txt
+# CHECK: # .---command stderr{{-*}}
+# CHECK-NEXT: # | [Errno 2] No such file or directory: 'temp1.txt'
+# CHECK: # error: command failed with exit status: 1
+
+# CHECK: PASS: shtest-cat :: cat.txt ({{[^)]*}})
+
+# CHECK: Total Discovered Tests: 3
+# CHECK-NEXT: Passed: 1 {{\([0-9]*\.[0-9]*%\)}}
+# CHECK-NEXT: Failed: 2 {{\([0-9]*\.[0-9]*%\)}}
diff --git a/llvm/utils/lit/tests/shtest-shell.py b/llvm/utils/lit/tests/shtest-shell.py
index 86851194880620..8f2b865f333a57 100644
--- a/llvm/utils/lit/tests/shtest-shell.py
+++ b/llvm/utils/lit/tests/shtest-shell.py
@@ -18,22 +18,6 @@
 
 # CHECK: -- Testing:
 
-# CHECK: FAIL: shtest-shell :: cat-error-0.txt
-# CHECK: *** TEST 'shtest-shell :: cat-error-0.txt' FAILED ***
-# CHECK: cat -b temp1.txt
-# CHECK: # .---command stderr{{-*}}
-# CHECK: # | Unsupported: 'cat':  option -b not recognized
-# CHECK: # error: command failed with exit status: 1
-# CHECK: ***
-
-# CHECK: FAIL: shtest-shell :: cat-error-1.txt
-# CHECK: *** TEST 'shtest-shell :: cat-error-1.txt' FAILED ***
-# CHECK: cat temp1.txt
-# CHECK: # .---command stderr{{-*}}
-# CHECK: # | [Errno 2] No such file or directory: 'temp1.txt'
-# CHECK: # error: command failed with exit status: 1
-# CHECK: ***
-
 # CHECK: FAIL: shtest-shell :: colon-error.txt
 # CHECK: *** TEST 'shtest-shell :: colon-error.txt' FAILED ***
 # CHECK: :
@@ -651,4 +635,4 @@
 
 # CHECK: PASS: shtest-shell :: valid-shell.txt
 # CHECK: Unresolved Tests (1)
-# CHECK: Failed Tests (38)
+# CHECK: Failed Tests (36)
diff --git a/mlir/include/mlir/Analysis/SliceWalk.h b/mlir/include/mlir/Analysis/SliceWalk.h
new file mode 100644
index 00000000000000..481c5690c533ba
--- /dev/null
+++ b/mlir/include/mlir/Analysis/SliceWalk.h
@@ -0,0 +1,98 @@
+//===- SliceWalk.h - Helpers for performing IR slice walks ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_SLICEWALK_H
+#define MLIR_ANALYSIS_SLICEWALK_H
+
+#include "mlir/IR/ValueRange.h"
+
+namespace mlir {
+
+/// A class to signal how to proceed with the walk of the backward slice:
+/// - Interrupt: Stops the walk.
+/// - AdvanceTo: Continues the walk to user-specified values.
+/// - Skip: Continues the walk, but skips the predecessors of the current value.
+class WalkContinuation {
+public:
+  enum class WalkAction {
+    /// Stops the walk.
+    Interrupt,
+    /// Continues the walk to user-specified values.
+    AdvanceTo,
+    /// Continues the walk, but skips the predecessors of the current value.
+    Skip
+  };
+
+  WalkContinuation(WalkAction action, mlir::ValueRange nextValues)
+      : action(action), nextValues(nextValues) {}
+
+  /// Allows diagnostics to interrupt the walk.
+  explicit WalkContinuation(mlir::Diagnostic &&)
+      : action(WalkAction::Interrupt) {}
+
+  /// Allows diagnostics to interrupt the walk.
+  explicit WalkContinuation(mlir::InFlightDiagnostic &&)
+      : action(WalkAction::Interrupt) {}
+
+  /// Creates a continuation that interrupts the walk.
+  static WalkContinuation interrupt() {
+    return WalkContinuation(WalkAction::Interrupt, {});
+  }
+
+  /// Creates a continuation that adds the user-specified `nextValues` to the
+  /// work list and advances the walk.
+  static WalkContinuation advanceTo(mlir::ValueRange nextValues) {
+    return WalkContinuation(WalkAction::AdvanceTo, nextValues);
+  }
+
+  /// Creates a continuation that advances the walk without adding any
+  /// predecessor values to the work list.
+  static WalkContinuation skip() {
+    return WalkContinuation(WalkAction::Skip, {});
+  }
+
+  /// Returns true if the walk was interrupted.
+  bool wasInterrupted() const { return action == WalkAction::Interrupt; }
+
+  /// Returns true if the walk was skipped.
+  bool wasSkipped() const { return action == WalkAction::Skip; }
+
+  /// Returns true if the walk was advanced to user-specified values.
+  bool wasAdvancedTo() const { return action == WalkAction::AdvanceTo; }
+
+  /// Returns the next values to continue the walk with.
+  mlir::ArrayRef<mlir::Value> getNextValues() const { return nextValues; }
+
+private:
+  WalkAction action;
+  /// The next values to continue the walk with.
+  mlir::SmallVector<mlir::Value> nextValues;
+};
+
+/// A callback that is invoked for each value encountered during the walk of the
+/// slice. The callback takes the current value, and returns the walk
+/// continuation, which determines if the walk should proceed and if yes, with
+/// which values.
+using WalkCallback = mlir::function_ref<WalkContinuation(mlir::Value)>;
+
+/// Walks the slice starting from the `rootValues` using a depth-first
+/// traversal. The walk calls the provided `walkCallback` for each value
+/// encountered in the slice and uses the returned walk continuation to
+/// determine how to proceed.
+WalkContinuation walkSlice(mlir::ValueRange rootValues,
+                           WalkCallback walkCallback);
+
+/// Computes a vector of all control predecessors of `value`. Relies on
+/// RegionBranchOpInterface and BranchOpInterface to determine predecessors.
+/// Returns nullopt if `value` has no predecessors or when the relevant
+/// operations are missing the interface implementations.
+std::optional<SmallVector<Value>> getControlFlowPredecessors(Value value);
+
+} // namespace mlir
+
+#endif // MLIR_ANALYSIS_SLICEWALK_H
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
index 7871b46724a03d..0be0f8ef2d7a0c 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -897,6 +897,8 @@ def Tosa_TableOp : Tosa_InferShapedTypeOp<"table"> {
   let assemblyFormat = [{
     $input `,` $table attr-dict `:` `(` type($input) `,` type($table) `)` `->` type($output)
   }];
+
+  let hasVerifier = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt
index 38d8415d81c72d..609cb34309829e 100644
--- a/mlir/lib/Analysis/CMakeLists.txt
+++ b/mlir/lib/Analysis/CMakeLists.txt
@@ -29,6 +29,7 @@ add_mlir_library(MLIRAnalysis
   Liveness.cpp
   CFGLoopInfo.cpp
   SliceAnalysis.cpp
+  SliceWalk.cpp
   TopologicalSortUtils.cpp
 
   AliasAnalysis/LocalAliasAnalysis.cpp
diff --git a/mlir/lib/Analysis/SliceWalk.cpp b/mlir/lib/Analysis/SliceWalk.cpp
new file mode 100644
index 00000000000000..9d770639dc53ca
--- /dev/null
+++ b/mlir/lib/Analysis/SliceWalk.cpp
@@ -0,0 +1,139 @@
+#include "mlir/Analysis/SliceWalk.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+
+using namespace mlir;
+
+WalkContinuation mlir::walkSlice(ValueRange rootValues,
+                                 WalkCallback walkCallback) {
+  // Search the backward slice starting from the root values.
+  SmallVector<Value> workList = rootValues;
+  llvm::SmallDenseSet<Value, 16> seenValues;
+  while (!workList.empty()) {
+    // Search the backward slice of the current value.
+    Value current = workList.pop_back_val();
+
+    // Skip the current value if it has already been seen.
+    if (!seenValues.insert(current).second)
+      continue;
+
+    // Call the walk callback with the current value.
+    WalkContinuation continuation = walkCallback(current);
+    if (continuation.wasInterrupted())
+      return continuation;
+    if (continuation.wasSkipped())
+      continue;
+
+    assert(continuation.wasAdvancedTo());
+    // Add the next values to the work list if the walk should continue.
+    workList.append(continuation.getNextValues().begin(),
+                    continuation.getNextValues().end());
+  }
+
+  return WalkContinuation::skip();
+}
+
+/// Returns the operands from all predecessor regions that match `operandNumber`
+/// for the `successor` region within `regionOp`.
+static SmallVector<Value>
+getRegionPredecessorOperands(RegionBranchOpInterface regionOp,
+                             RegionSuccessor successor,
+                             unsigned operandNumber) {
+  SmallVector<Value> predecessorOperands;
+
+  // Returns true if `successors` contains `successor`.
+  auto isContained = [](ArrayRef<RegionSuccessor> successors,
+                        RegionSuccessor successor) {
+    auto *it = llvm::find_if(successors, [&successor](RegionSuccessor curr) {
+      return curr.getSuccessor() == successor.getSuccessor();
+    });
+    return it != successors.end();
+  };
+
+  // Search the operand ranges on the region operation itself.
+  SmallVector<Attribute> operandAttributes(regionOp->getNumOperands());
+  SmallVector<RegionSuccessor> successors;
+  regionOp.getEntrySuccessorRegions(operandAttributes, successors);
+  if (isContained(successors, successor)) {
+    OperandRange operands = regionOp.getEntrySuccessorOperands(successor);
+    predecessorOperands.push_back(operands[operandNumber]);
+  }
+
+  // Search the operand ranges on region terminators.
+  for (Region &region : regionOp->getRegions()) {
+    for (Block &block : region) {
+      auto terminatorOp =
+          dyn_cast<RegionBranchTerminatorOpInterface>(block.getTerminator());
+      if (!terminatorOp)
+        continue;
+      SmallVector<Attribute> operandAttributes(terminatorOp->getNumOperands());
+      SmallVector<RegionSuccessor> successors;
+      terminatorOp.getSuccessorRegions(operandAttributes, successors);
+      if (isContained(successors, successor)) {
+        OperandRange operands = terminatorOp.getSuccessorOperands(successor);
+        predecessorOperands.push_back(operands[operandNumber]);
+      }
+    }
+  }
+
+  return predecessorOperands;
+}
+
+/// Returns the predecessor branch operands that match `blockArg`, or nullopt if
+/// some of the predecessor terminators do not implement the BranchOpInterface.
+static std::optional<SmallVector<Value>>
+getBlockPredecessorOperands(BlockArgument blockArg) {
+  Block *block = blockArg.getOwner();
+
+  // Search the predecessor operands for all predecessor terminators.
+  SmallVector<Value> predecessorOperands;
+  for (auto it = block->pred_begin(); it != block->pred_end(); ++it) {
+    Block *predecessor = *it;
+    auto branchOp = dyn_cast<BranchOpInterface>(predecessor->getTerminator());
+    if (!branchOp)
+      return std::nullopt;
+    SuccessorOperands successorOperands =
+        branchOp.getSuccessorOperands(it.getSuccessorIndex());
+    // Store the predecessor operand if the block argument matches an operand
+    // and is not produced by the terminator.
+    if (Value operand = successorOperands[blockArg.getArgNumber()])
+      predecessorOperands.push_back(operand);
+  }
+
+  return predecessorOperands;
+}
+
+std::optional<SmallVector<Value>>
+mlir::getControlFlowPredecessors(Value value) {
+  SmallVector<Value> result;
+  if (OpResult opResult = dyn_cast<OpResult>(value)) {
+    auto regionOp = dyn_cast<RegionBranchOpInterface>(opResult.getOwner());
+    // If the interface is not implemented, there are no control flow
+    // predecessors to work with.
+    if (!regionOp)
+      return std::nullopt;
+    // Add the control flow predecessor operands to the work list.
+    RegionSuccessor region(regionOp->getResults());
+    SmallVector<Value> predecessorOperands = getRegionPredecessorOperands(
+        regionOp, region, opResult.getResultNumber());
+    return predecessorOperands;
+  }
+
+  auto blockArg = cast<BlockArgument>(value);
+  Block *block = blockArg.getOwner();
+  // Search the region predecessor operands for structured control flow.
+  if (block->isEntryBlock()) {
+    if (auto regionBranchOp =
+            dyn_cast<RegionBranchOpInterface>(block->getParentOp())) {
+      RegionSuccessor region(blockArg.getParentRegion());
+      SmallVector<Value> predecessorOperands = getRegionPredecessorOperands(
+          regionBranchOp, region, blockArg.getArgNumber());
+      return predecessorOperands;
+    }
+    // If the interface is not implemented, there are no control flow
+    // predecessors to work with.
+    return std::nullopt;
+  }
+
+  // Search the block predecessor operands for unstructured control flow.
+  return getBlockPredecessorOperands(blockArg);
+}
diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index 8eba76a9abee8d..504f63b48c9433 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.h"
+#include "mlir/Analysis/SliceWalk.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Interfaces/DataLayoutInterfaces.h"
@@ -221,86 +222,45 @@ static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
   return ArrayAttr::get(lhs.getContext(), result);
 }
 
-/// Attempts to return the underlying pointer value that `pointerValue` is based
-/// on. This traverses down the chain of operations to the last operation
-/// producing the base pointer and returns it. If it encounters an operation it
-/// cannot further traverse through, returns the operation's result.
-static Value getUnderlyingObject(Value pointerValue) {
-  while (true) {
-    if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) {
-      pointerValue = gepOp.getBase();
-      continue;
-    }
-
-    if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) {
-      pointerValue = addrCast.getOperand();
-      continue;
-    }
-
-    break;
-  }
-
-  return pointerValue;
-}
-
 /// Attempts to return the set of all underlying pointer values that
 /// `pointerValue` is based on. This function traverses through select
-/// operations and block arguments unlike getUnderlyingObject.
-static SmallVector<Value> getUnderlyingObjectSet(Value pointerValue) {
+/// operations and block arguments.
+static FailureOr<SmallVector<Value>>
+getUnderlyingObjectSet(Value pointerValue) {
   SmallVector<Value> result;
-
-  SmallVector<Value> workList{pointerValue};
-  // Avoid dataflow loops.
-  SmallPtrSet<Value, 4> seen;
-  do {
-    Value current = workList.pop_back_val();
-    current = getUnderlyingObject(current);
-
-    if (!seen.insert(current).second)
-      continue;
-
-    if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) {
-      workList.push_back(selectOp.getTrueValue());
-      workList.push_back(selectOp.getFalseValue());
-      continue;
+  WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) {
+    if (auto gepOp = val.getDefiningOp<LLVM::GEPOp>())
+      return WalkContinuation::advanceTo(gepOp.getBase());
+
+    if (auto addrCast = val.getDefiningOp<LLVM::AddrSpaceCastOp>())
+      return WalkContinuation::advanceTo(addrCast.getOperand());
+
+    // TODO: Add a SelectLikeOpInterface and use it in the slicing utility.
+    if (auto selectOp = val.getDefiningOp<LLVM::SelectOp>())
+      return WalkContinuation::advanceTo(
+          {selectOp.getTrueValue(), selectOp.getFalseValue()});
+
+    // Attempt to advance to control flow predecessors.
+    std::optional<SmallVector<Value>> controlFlowPredecessors =
+        getControlFlowPredecessors(val);
+    if (controlFlowPredecessors)
+      return WalkContinuation::advanceTo(*controlFlowPredecessors);
+
+    // For all non-control flow results, consider `val` an underlying object.
+    if (isa<OpResult>(val)) {
+      result.push_back(val);
+      return WalkContinuation::skip();
     }
 
-    if (auto blockArg = dyn_cast<BlockArgument>(current)) {
-      Block *parentBlock = blockArg.getParentBlock();
-
-      // Attempt to find all block argument operands for every predecessor.
-      // If any operand to the block argument wasn't found in a predecessor,
-      // conservatively add the block argument to the result set.
-      SmallVector<Value> operands;
-      bool anyUnknown = false;
-      for (auto iter = parentBlock->pred_begin();
-           iter != parentBlock->pred_end(); iter++) {
-        auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator());
-        if (!branch) {
-          result.push_back(blockArg);
-          anyUnknown = true;
-          break;
-        }
-
-        Value operand = branch.getSuccessorOperands(
-            iter.getSuccessorIndex())[blockArg.getArgNumber()];
-        if (!operand) {
-          result.push_back(blockArg);
-          anyUnknown = true;
-          break;
-        }
-
-        operands.push_back(operand);
-      }
-
-      if (!anyUnknown)
-        llvm::append_range(workList, operands);
-
-      continue;
-    }
+    // If this place is reached, `val` is a block argument that is not
+    // understood. Therefore, we conservatively interrupt.
+    // Note: Dealing with function arguments is not necessary, as the slice
+    // would have to go through an SSACopyOp first.
+    return WalkContinuation::interrupt();
+  });
 
-    result.push_back(current);
-  } while (!workList.empty());
+  if (walkResult.wasInterrupted())
+    return failure();
 
   return result;
 }
@@ -363,9 +323,14 @@ static void createNewAliasScopesFromNoAliasParameter(
 
       // Find the set of underlying pointers that this pointer is based on.
       SmallPtrSet<Value, 4> basedOnPointers;
-      for (Value pointer : pointerArgs)
-        llvm::copy(getUnderlyingObjectSet(pointer),
+      for (Value pointer : pointerArgs) {
+        FailureOr<SmallVector<Value>> underlyingObjectSet =
+            getUnderlyingObjectSet(pointer);
+        if (failed(underlyingObjectSet))
+          return;
+        llvm::copy(*underlyingObjectSet,
                    std::inserter(basedOnPointers, basedOnPointers.begin()));
+      }
 
       bool aliasesOtherKnownObject = false;
       // Go through the based on pointers and check that they are either:
diff --git a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
index 88d56a8fbec749..a45b79194a7580 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
@@ -234,6 +234,46 @@ struct ConvertMemRefAssumeAlignment final
   }
 };
 
+//===----------------------------------------------------------------------===//
+// ConvertMemRefCopy
+//===----------------------------------------------------------------------===//
+
+struct ConvertMemRefCopy final : OpConversionPattern<memref::CopyOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::CopyOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto maybeRankedSource = dyn_cast<MemRefType>(op.getSource().getType());
+    auto maybeRankedDest = dyn_cast<MemRefType>(op.getTarget().getType());
+    if (maybeRankedSource && maybeRankedDest &&
+        maybeRankedSource.getLayout() != maybeRankedDest.getLayout())
+      return rewriter.notifyMatchFailure(
+          op, llvm::formatv("memref.copy emulation with distinct layouts ({0} "
+                            "and {1}) is currently unimplemented",
+                            maybeRankedSource.getLayout(),
+                            maybeRankedDest.getLayout()));
+    rewriter.replaceOpWithNewOp<memref::CopyOp>(op, adaptor.getSource(),
+                                                adaptor.getTarget());
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ConvertMemRefDealloc
+//===----------------------------------------------------------------------===//
+
+struct ConvertMemRefDealloc final : OpConversionPattern<memref::DeallocOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::DeallocOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<memref::DeallocOp>(op, adaptor.getMemref());
+    return success();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // ConvertMemRefLoad
 //===----------------------------------------------------------------------===//
@@ -300,6 +340,30 @@ struct ConvertMemRefLoad final : OpConversionPattern<memref::LoadOp> {
   }
 };
 
+//===----------------------------------------------------------------------===//
+// ConvertMemRefMemorySpaceCast
+//===----------------------------------------------------------------------===//
+
+struct ConvertMemRefMemorySpaceCast final
+    : OpConversionPattern<memref::MemorySpaceCastOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::MemorySpaceCastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    Type newTy = getTypeConverter()->convertType(op.getDest().getType());
+    if (!newTy) {
+      return rewriter.notifyMatchFailure(
+          op->getLoc(), llvm::formatv("failed to convert memref type: {0}",
+                                      op.getDest().getType()));
+    }
+
+    rewriter.replaceOpWithNewOp<memref::MemorySpaceCastOp>(op, newTy,
+                                                           adaptor.getSource());
+    return success();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // ConvertMemRefReinterpretCast
 //===----------------------------------------------------------------------===//
@@ -490,6 +554,28 @@ struct ConvertMemRefCollapseShape final
   }
 };
 
+/// Emulating a `memref.expand_shape` becomes a no-op after emulation given
+/// that we flatten memrefs to a single dimension as part of the emulation and
+/// the expansion would just have been undone.
+struct ConvertMemRefExpandShape final
+    : OpConversionPattern<memref::ExpandShapeOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::ExpandShapeOp expandShapeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    Value srcVal = adaptor.getSrc();
+    auto newTy = dyn_cast<MemRefType>(srcVal.getType());
+    if (!newTy)
+      return failure();
+
+    if (newTy.getRank() != 1)
+      return failure();
+
+    rewriter.replaceOp(expandShapeOp, srcVal);
+    return success();
+  }
+};
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -502,9 +588,10 @@ void memref::populateMemRefNarrowTypeEmulationPatterns(
 
   // Populate `memref.*` conversion patterns.
   patterns.add<ConvertMemRefAllocation<memref::AllocOp>,
-               ConvertMemRefAllocation<memref::AllocaOp>,
-               ConvertMemRefCollapseShape, ConvertMemRefLoad,
-               ConvertMemrefStore, ConvertMemRefAssumeAlignment,
+               ConvertMemRefAllocation<memref::AllocaOp>, ConvertMemRefCopy,
+               ConvertMemRefDealloc, ConvertMemRefCollapseShape,
+               ConvertMemRefExpandShape, ConvertMemRefLoad, ConvertMemrefStore,
+               ConvertMemRefAssumeAlignment, ConvertMemRefMemorySpaceCast,
                ConvertMemRefSubview, ConvertMemRefReinterpretCast>(
       typeConverter, patterns.getContext());
   memref::populateResolveExtractStridedMetadataPatterns(patterns);
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
index 585c5b73814219..a2049ba4a4924d 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
@@ -726,6 +726,41 @@ struct ExtractStridedMetadataOpCollapseShapeFolder
   }
 };
 
+/// Pattern to replace `extract_strided_metadata(expand_shape)`
+/// with the results of computing the sizes and strides on the expanded shape
+/// and dividing up dimensions into static and dynamic parts as needed.
+struct ExtractStridedMetadataOpExpandShapeFolder
+    : OpRewritePattern<memref::ExtractStridedMetadataOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::ExtractStridedMetadataOp op,
+                                PatternRewriter &rewriter) const override {
+    auto expandShapeOp = op.getSource().getDefiningOp<memref::ExpandShapeOp>();
+    if (!expandShapeOp)
+      return failure();
+
+    FailureOr<StridedMetadata> stridedMetadata =
+        resolveReshapeStridedMetadata<memref::ExpandShapeOp>(
+            rewriter, expandShapeOp, getExpandedSizes, getExpandedStrides);
+    if (failed(stridedMetadata)) {
+      return rewriter.notifyMatchFailure(
+          op, "failed to resolve metadata in terms of source expand_shape op");
+    }
+
+    Location loc = expandShapeOp.getLoc();
+    SmallVector<Value> results;
+    results.push_back(stridedMetadata->basePtr);
+    results.push_back(getValueOrCreateConstantIndexOp(rewriter, loc,
+                                                      stridedMetadata->offset));
+    results.append(
+        getValueOrCreateConstantIndexOp(rewriter, loc, stridedMetadata->sizes));
+    results.append(getValueOrCreateConstantIndexOp(rewriter, loc,
+                                                   stridedMetadata->strides));
+    rewriter.replaceOp(op, results);
+    return success();
+  }
+};
+
 /// Replace `base, offset, sizes, strides =
 ///              extract_strided_metadata(allocLikeOp)`
 ///
@@ -1060,6 +1095,54 @@ class ExtractStridedMetadataOpCastFolder
   }
 };
 
+/// Replace `base, offset, sizes, strides = extract_strided_metadata(
+///      memory_space_cast(src) to dstTy)`
+/// with
+/// ```
+///    oldBase, offset, sizes, strides = extract_strided_metadata(src)
+///    destBaseTy = type(oldBase) with memory space from destTy
+///    base = memory_space_cast(oldBase) to destBaseTy
+/// ```
+///
+/// In other words, propagate metadata extraction accross memory space casts.
+class ExtractStridedMetadataOpMemorySpaceCastFolder
+    : public OpRewritePattern<memref::ExtractStridedMetadataOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult
+  matchAndRewrite(memref::ExtractStridedMetadataOp extractStridedMetadataOp,
+                  PatternRewriter &rewriter) const override {
+    Location loc = extractStridedMetadataOp.getLoc();
+    Value source = extractStridedMetadataOp.getSource();
+    auto memSpaceCastOp = source.getDefiningOp<memref::MemorySpaceCastOp>();
+    if (!memSpaceCastOp)
+      return failure();
+    auto newExtractStridedMetadata =
+        rewriter.create<memref::ExtractStridedMetadataOp>(
+            loc, memSpaceCastOp.getSource());
+    SmallVector<Value> results(newExtractStridedMetadata.getResults());
+    // As with most other strided metadata rewrite patterns, don't introduce
+    // a use of the base pointer where non existed. This needs to happen here,
+    // as opposed to in later dead-code elimination, because these patterns are
+    // sometimes used during dialect conversion (see EmulateNarrowType, for
+    // example), so adding spurious usages would cause a pre-legalization value
+    // to be live that would be dead had this pattern not run.
+    if (!extractStridedMetadataOp.getBaseBuffer().use_empty()) {
+      auto baseBuffer = results[0];
+      auto baseBufferType = cast<MemRefType>(baseBuffer.getType());
+      MemRefType::Builder newTypeBuilder(baseBufferType);
+      newTypeBuilder.setMemorySpace(
+          memSpaceCastOp.getResult().getType().getMemorySpace());
+      results[0] = rewriter.create<memref::MemorySpaceCastOp>(
+          loc, Type{newTypeBuilder}, baseBuffer);
+    } else {
+      results[0] = nullptr;
+    }
+    rewriter.replaceOp(extractStridedMetadataOp, results);
+    return success();
+  }
+};
+
 /// Replace `base, offset =
 ///            extract_strided_metadata(extract_strided_metadata(src)#0)`
 /// With
@@ -1099,11 +1182,13 @@ void memref::populateExpandStridedMetadataPatterns(
                ExtractStridedMetadataOpAllocFolder<memref::AllocOp>,
                ExtractStridedMetadataOpAllocFolder<memref::AllocaOp>,
                ExtractStridedMetadataOpCollapseShapeFolder,
+               ExtractStridedMetadataOpExpandShapeFolder,
                ExtractStridedMetadataOpGetGlobalFolder,
                RewriteExtractAlignedPointerAsIndexOfViewLikeOp,
                ExtractStridedMetadataOpReinterpretCastFolder,
                ExtractStridedMetadataOpSubviewFolder,
                ExtractStridedMetadataOpCastFolder,
+               ExtractStridedMetadataOpMemorySpaceCastFolder,
                ExtractStridedMetadataOpExtractStridedMetadataFolder>(
       patterns.getContext());
 }
@@ -1113,11 +1198,13 @@ void memref::populateResolveExtractStridedMetadataPatterns(
   patterns.add<ExtractStridedMetadataOpAllocFolder<memref::AllocOp>,
                ExtractStridedMetadataOpAllocFolder<memref::AllocaOp>,
                ExtractStridedMetadataOpCollapseShapeFolder,
+               ExtractStridedMetadataOpExpandShapeFolder,
                ExtractStridedMetadataOpGetGlobalFolder,
                ExtractStridedMetadataOpSubviewFolder,
                RewriteExtractAlignedPointerAsIndexOfViewLikeOp,
                ExtractStridedMetadataOpReinterpretCastFolder,
                ExtractStridedMetadataOpCastFolder,
+               ExtractStridedMetadataOpMemorySpaceCastFolder,
                ExtractStridedMetadataOpExtractStridedMetadataFolder>(
       patterns.getContext());
 }
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index 39ea7a5b61f5ec..d4e49b6e3c044c 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -864,6 +864,29 @@ LogicalResult tosa::TableOp::inferReturnTypeComponents(
   return success();
 }
 
+LogicalResult tosa::TableOp::verify() {
+  TensorType inputType = getInput().getType();
+  TensorType outputType = getOutput().getType();
+
+  if (inputType.hasRank() && outputType.hasRank() &&
+      inputType.getRank() != outputType.getRank())
+    return emitOpError()
+           << "expected input tensor rank to equal result tensor rank";
+
+  auto inputDims = inputType.getShape();
+  auto outputDims = outputType.getShape();
+  for (auto it : llvm::enumerate(llvm::zip(inputDims, outputDims))) {
+    int64_t dim = it.index();
+    auto [inputDim, outputDim] = it.value();
+    if (!ShapedType::isDynamic(outputDim) && outputDim != inputDim) {
+      return emitOpError() << "dim(result, " << dim << ") = " << outputDim
+                           << " doesn't match dim(input, " << dim
+                           << ") = " << inputDim;
+    }
+  }
+  return success();
+}
+
 LogicalResult tosa::TileOp::inferReturnTypeComponents(
     MLIRContext *context, ::std::optional<Location> location,
     TileOp::Adaptor adaptor,
diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir
index 7233a8bfffa9db..47be1be30577d8 100644
--- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir
+++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// XFAIL: target=s390x-{{.*}}
+// XFAIL: target={{(s390x|sparc.*)-.*}}
 
 module attributes {
   spirv.target_env = #spirv.target_env<
diff --git a/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir b/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir
index 0b8b60e963bb01..a91b991c5ed2b9 100644
--- a/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir
@@ -296,6 +296,60 @@ llvm.func @bar(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
 
 llvm.func @random() -> i1
 
+llvm.func @region_branch(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) {
+  %0 = llvm.mlir.constant(5 : i64) : i32
+  test.region_if %arg0: !llvm.ptr -> !llvm.ptr then {
+  ^bb0(%arg2: !llvm.ptr):
+    test.region_if_yield %arg0 : !llvm.ptr
+  } else {
+  ^bb0(%arg2: !llvm.ptr):
+    test.region_if_yield %arg0 : !llvm.ptr
+  } join {
+  ^bb0(%arg2: !llvm.ptr):
+    llvm.store %0, %arg2 : i32, !llvm.ptr
+    test.region_if_yield %arg0 : !llvm.ptr
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @region_branch_inlining
+// CHECK: llvm.store
+// CHECK-SAME: alias_scopes = [#[[$ARG0_SCOPE]]]
+// CHECK-SAME: noalias_scopes = [#[[$ARG1_SCOPE]]]
+llvm.func @region_branch_inlining(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
+  llvm.call @region_branch(%arg0, %arg2) : (!llvm.ptr, !llvm.ptr) -> ()
+  llvm.return
+}
+
+// -----
+
+llvm.func @missing_region_branch(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) {
+  %0 = llvm.mlir.constant(5 : i64) : i32
+  "test.one_region_op"() ({
+  ^bb0(%arg2: !llvm.ptr):
+    llvm.store %0, %arg2 : i32, !llvm.ptr
+    "test.terminator"() : () -> ()
+  }) : () -> ()
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @missing_region_branch_inlining
+// CHECK: llvm.store
+// CHECK-NOT: alias_scopes
+// CHECK-NOT: noalias_scopes
+llvm.func @missing_region_branch_inlining(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
+  llvm.call @missing_region_branch(%arg0, %arg2) : (!llvm.ptr, !llvm.ptr) -> ()
+  llvm.return
+}
+
+// -----
+
+// CHECK-DAG: #[[DOMAIN:.*]] = #llvm.alias_scope_domain<{{.*}}>
+// CHECK-DAG: #[[$ARG0_SCOPE:.*]] = #llvm.alias_scope<id = {{.*}}, domain = #[[DOMAIN]]{{(,.*)?}}>
+// CHECK-DAG: #[[$ARG1_SCOPE:.*]] = #llvm.alias_scope<id = {{.*}}, domain = #[[DOMAIN]]{{(,.*)?}}>
+
+llvm.func @random() -> i1
+
 llvm.func @block_arg(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) {
   %0 = llvm.mlir.constant(5 : i64) : i32
   %1 = llvm.mlir.constant(1 : i64) : i64
diff --git a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
index a67237b5e4dd19..540da239fced08 100644
--- a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
+++ b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
@@ -6,11 +6,13 @@ func.func @memref_i8() -> i8 {
     %c3 = arith.constant 3 : index
     %m = memref.alloc() : memref<4xi8, 1>
     %v = memref.load %m[%c3] : memref<4xi8, 1>
+    memref.dealloc %m : memref<4xi8, 1>
     return %v : i8
 }
 // CHECK-LABEL: func @memref_i8()
 //       CHECK:   %[[M:.+]] = memref.alloc() : memref<4xi8, 1>
 //  CHECK-NEXT:   %[[V:.+]] = memref.load %[[M]][%{{.+}}] : memref<4xi8, 1>
+//  CHECK-NEXT:   memref.dealloc %[[M]]
 //  CHECK-NEXT:   return %[[V]]
 
 // CHECK32-LABEL: func @memref_i8()
@@ -21,6 +23,7 @@ func.func @memref_i8() -> i8 {
 //       CHECK32:   %[[CAST:.+]] = arith.index_cast %[[C24]] : index to i32
 //       CHECK32:   %[[SHIFTRT:.+]] = arith.shrsi %[[V]], %[[CAST]]
 //       CHECK32:   %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i8
+//  CHECK32-NEXT:   memref.dealloc %[[M]]
 //  CHECK32-NEXT:   return %[[TRUNC]]
 
 // -----
@@ -485,3 +488,68 @@ func.func @memref_collapse_shape_i4(%idx0 : index, %idx1 : index) -> i4 {
 //   CHECK32-NOT:     memref.collapse_shape
 //       CHECK32:     memref.load %[[ALLOC]][%{{.*}}] : memref<4096xi32>
 
+// -----
+
+func.func @memref_expand_shape_i4(%idx0 : index, %idx1 : index, %idx2 : index) -> i4 {
+  %arr = memref.alloc() : memref<256x128xi4>
+  %expand = memref.expand_shape %arr[[0, 1], [2]] output_shape [32, 8, 128] : memref<256x128xi4> into memref<32x8x128xi4>
+  %1 = memref.load %expand[%idx0, %idx1, %idx2] : memref<32x8x128xi4>
+  return %1 : i4
+}
+
+// CHECK-LABEL:   func.func @memref_expand_shape_i4(
+//       CHECK:     %[[ALLOC:.*]] = memref.alloc() : memref<16384xi8>
+//   CHECK-NOT:     memref.expand_shape
+//       CHECK:     memref.load %[[ALLOC]][%{{.*}}] : memref<16384xi8>
+
+// CHECK32-LABEL:   func.func @memref_expand_shape_i4(
+//       CHECK32:     %[[ALLOC:.*]] = memref.alloc() : memref<4096xi32>
+//   CHECK32-NOT:     memref.expand_shape
+//       CHECK32:     memref.load %[[ALLOC]][%{{.*}}] : memref<4096xi32>
+
+// -----
+
+func.func @memref_memory_space_cast_i4(%arg0: memref<32x128xi4, 1>) -> memref<32x128xi4> {
+  %cast = memref.memory_space_cast %arg0 : memref<32x128xi4, 1> to memref<32x128xi4>
+  return %cast : memref<32x128xi4>
+}
+
+// CHECK-LABEL:   func.func @memref_memory_space_cast_i4(
+//  CHECK-SAME:   %[[ARG0:.*]]: memref<2048xi8, 1>
+//       CHECK:     %[[CAST:.*]] = memref.memory_space_cast %[[ARG0]] : memref<2048xi8, 1> to memref<2048xi8>
+//       CHECK:     return %[[CAST]]
+
+// CHECK32-LABEL:   func.func @memref_memory_space_cast_i4(
+//  CHECK32-SAME:   %[[ARG0:.*]]: memref<512xi32, 1>
+//       CHECK32:     %[[CAST:.*]] = memref.memory_space_cast %[[ARG0]] : memref<512xi32, 1> to memref<512xi32>
+//       CHECK32:     return %[[CAST]]
+
+// -----
+
+func.func @memref_copy_i4(%arg0: memref<32x128xi4, 1>, %arg1: memref<32x128xi4>) {
+  memref.copy %arg0, %arg1 : memref<32x128xi4, 1> to memref<32x128xi4>
+  return
+}
+
+// CHECK-LABEL:   func.func @memref_copy_i4(
+//  CHECK-SAME:   %[[ARG0:.*]]: memref<2048xi8, 1>, %[[ARG1:.*]]: memref<2048xi8>
+//       CHECK:     memref.copy %[[ARG0]], %[[ARG1]]
+//       CHECK:     return
+
+// CHECK32-LABEL:   func.func @memref_copy_i4(
+//  CHECK32-SAME:   %[[ARG0:.*]]: memref<512xi32, 1>, %[[ARG1:.*]]: memref<512xi32>
+//       CHECK32:     memref.copy %[[ARG0]], %[[ARG1]]
+//       CHECK32:     return
+
+// -----
+
+!colMajor = memref<8x8xi4, strided<[1, 8]>>
+func.func @copy_distinct_layouts(%idx : index) -> i4 {
+  %c0 = arith.constant 0 : index
+  %arr = memref.alloc() : memref<8x8xi4>
+  %arr2 = memref.alloc() : !colMajor
+  // expected-error @+1 {{failed to legalize operation 'memref.copy' that was explicitly marked illegal}}
+  memref.copy %arr, %arr2 : memref<8x8xi4> to !colMajor
+  %ld = memref.load %arr2[%c0, %c0] : !colMajor
+  return %ld : i4
+}
diff --git a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
index d884ade3195329..8aac802ba10ae9 100644
--- a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
+++ b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
@@ -1553,3 +1553,41 @@ func.func @extract_strided_metadata_of_collapse_shape(%base: memref<5x4xf32>)
 //   CHECK-DAG:    %[[STEP:.*]] = arith.constant 1 : index
 //       CHECK:    %[[BASE:.*]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata
 //       CHECK:    return %[[BASE]], %[[OFFSET]], %[[SIZE]], %[[STEP]] : memref<f32>, index, index, index
+
+// -----
+
+func.func @extract_strided_metadata_of_memory_space_cast(%base: memref<20xf32>)
+    -> (memref<f32, 1>, index, index, index) {
+
+  %memory_space_cast = memref.memory_space_cast %base : memref<20xf32> to memref<20xf32, 1>
+
+  %base_buffer, %offset, %size, %stride = memref.extract_strided_metadata %memory_space_cast :
+    memref<20xf32, 1> -> memref<f32, 1>, index, index, index
+
+  return %base_buffer, %offset, %size, %stride :
+    memref<f32, 1>, index, index, index
+}
+
+// CHECK-LABEL:  func @extract_strided_metadata_of_memory_space_cast
+//   CHECK-DAG:    %[[OFFSET:.*]] = arith.constant 0 : index
+//   CHECK-DAG:    %[[SIZE:.*]] = arith.constant 20 : index
+//   CHECK-DAG:    %[[STEP:.*]] = arith.constant 1 : index
+//       CHECK:    %[[BASE:.*]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata
+//       CHECK:    %[[CAST:.*]] = memref.memory_space_cast %[[BASE]]
+//       CHECK:    return %[[CAST]], %[[OFFSET]], %[[SIZE]], %[[STEP]] : memref<f32, 1>, index, index, index
+
+// -----
+
+func.func @extract_strided_metadata_of_memory_space_cast_no_base(%base: memref<20xf32>)
+    -> (index, index, index) {
+
+  %memory_space_cast = memref.memory_space_cast %base : memref<20xf32> to memref<20xf32, 1>
+
+  %base_buffer, %offset, %size, %stride = memref.extract_strided_metadata %memory_space_cast :
+    memref<20xf32, 1> -> memref<f32, 1>, index, index, index
+
+  return %offset, %size, %stride : index, index, index
+}
+
+// CHECK-LABEL:  func @extract_strided_metadata_of_memory_space_cast_no_base
+//   CHECK-NOT:  memref.memory_space_cast
diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir
index e1fcf056480083..e723aef3815ce6 100644
--- a/mlir/test/Dialect/Tosa/invalid.mlir
+++ b/mlir/test/Dialect/Tosa/invalid.mlir
@@ -448,3 +448,30 @@ func.func @test_large_constant_permutation() {
   %3 = tosa.transpose %2, %1 : (tensor<?x27xi64>, tensor<2xi32>) -> tensor<?x27xi64>
   return
 }
+
+// -----
+
+// CHECK-LABEL: test_table_rank0_table
+func.func @test_table_rank0_table(%arg0: tensor<64xi16>, %arg1: tensor<i16>) {
+  // expected-error@+1 {{'tosa.table' op operand #1 must be 1-d tensor, but got 'tensor<i16>'}}
+  %0 = tosa.table %arg0, %arg1 : (tensor<64xi16>, tensor<i16>) -> tensor<64xi16>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: test_table_io_rank_mismatch
+func.func @test_table_io_rank_mismatch(%arg0: tensor<64xi16>, %arg1: tensor<6xi16>) {
+  // expected-error@+1 {{'tosa.table' op expected input tensor rank to equal result tensor rank}}
+  %0 = tosa.table %arg0, %arg1 : (tensor<64xi16>, tensor<6xi16>) -> tensor<64x?xi16>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: test_table_io_shape_mismatch
+func.func @test_table_io_shape_mismatch(%arg0: tensor<?x16xi16>, %arg1: tensor<6xi16>) {
+  // expected-error@+1 {{'tosa.table' op dim(result, 1) = 15 doesn't match dim(input, 1) = 16}}
+  %0 = tosa.table %arg0, %arg1 : (tensor<?x16xi16>, tensor<6xi16>) -> tensor<?x15xi16>
+  return
+}
diff --git a/mlir/test/IR/elements-attr-interface.mlir b/mlir/test/IR/elements-attr-interface.mlir
index 5234c81bd841e3..79283f1aae99a8 100644
--- a/mlir/test/IR/elements-attr-interface.mlir
+++ b/mlir/test/IR/elements-attr-interface.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -test-elements-attr-interface -verify-diagnostics
 
 // Parsing external resources does not work on big-endian platforms currently
-// XFAIL: target=s390x-{{.*}}
+// XFAIL: target={{(s390x|sparc.*)-.*}}
 
 // This test contains various `ElementsAttr` attributes, and tests the support
 // for iterating the values of these attributes using various native C++ types.
diff --git a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
index f8d082082117cb..98145bc35cba77 100644
--- a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
 
 // Decoding the attribute does not work on big-endian platforms currently
-// XFAIL: target=s390x-{{.*}}
+// XFAIL: target={{(s390x|sparc.*)-.*}}
 
 // CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000]
 llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource<dense_resource_test_5xf32> : tensor<5xf32>) : !llvm.array<5 x f32>
@@ -24,4 +24,4 @@ llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dens
       dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E"
     }
   }
-#-}
\ No newline at end of file
+#-}
diff --git a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel
index b9e0a2e153ac1e..13f6f815d39950 100644
--- a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel
@@ -33,5 +33,8 @@ expand_template(
             "//llvm:not",
         ] + glob(["Inputs/**"]),
     )
-    for src in glob(["*/*.py"])
+    for src in glob(
+        ["*/*.py"],
+        exclude = ["Inputs/**"],
+    )
 ]
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index ac9e311ad5110e..7273429d4f344f 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5425,8 +5425,9 @@ cc_library(
     includes = ["include"],
     deps = [
 	":DataLayoutInterfaces",
+        ":Analysis",
         ":FuncDialect",
-	":InliningUtils",
+	    ":InliningUtils",
         ":IR",
         ":LLVMDialect",
         ":LLVMPassIncGen",