From 3265dfe3e620d526ca15dcecaa1c68e63ceaba45 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 14 Aug 2024 14:22:36 -0700
Subject: [PATCH 01/47] [RISCV] Add signext attribute to return of fmv_x_w test
 in float-convert.ll. NFC

This shows that Zfinx generates a sext.w instruction on RV64.
The fadd.s should have filled the upper bits of the GPR with sign
bits so this is unnecessary. Proving it is unnecessary might be
difficult though.
---
 llvm/test/CodeGen/RISCV/float-convert.ll | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 21bf6618c52a26..805ddee4ac3f6f 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -336,17 +336,23 @@ start:
 }
 declare i32 @llvm.fptoui.sat.i32.f32(float)
 
-define i32 @fmv_x_w(float %a, float %b) nounwind {
+define signext i32 @fmv_x_w(float %a, float %b) nounwind {
 ; CHECKIF-LABEL: fmv_x_w:
 ; CHECKIF:       # %bb.0:
 ; CHECKIF-NEXT:    fadd.s fa5, fa0, fa1
 ; CHECKIF-NEXT:    fmv.x.w a0, fa5
 ; CHECKIF-NEXT:    ret
 ;
-; CHECKIZFINX-LABEL: fmv_x_w:
-; CHECKIZFINX:       # %bb.0:
-; CHECKIZFINX-NEXT:    fadd.s a0, a0, a1
-; CHECKIZFINX-NEXT:    ret
+; RV32IZFINX-LABEL: fmv_x_w:
+; RV32IZFINX:       # %bb.0:
+; RV32IZFINX-NEXT:    fadd.s a0, a0, a1
+; RV32IZFINX-NEXT:    ret
+;
+; RV64IZFINX-LABEL: fmv_x_w:
+; RV64IZFINX:       # %bb.0:
+; RV64IZFINX-NEXT:    fadd.s a0, a0, a1
+; RV64IZFINX-NEXT:    sext.w a0, a0
+; RV64IZFINX-NEXT:    ret
 ;
 ; RV32I-LABEL: fmv_x_w:
 ; RV32I:       # %bb.0:
@@ -362,6 +368,7 @@ define i32 @fmv_x_w(float %a, float %b) nounwind {
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    call __addsf3
+; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret

From 4bac8fd8904904bc7d502f39851eef50b5afff73 Mon Sep 17 00:00:00 2001
From: Connie <60797237+connieyzhu@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:45:02 -0700
Subject: [PATCH 02/47] [llvm-lit][test][NFC] Moved cat command tests into
 separate lit test file (#102366)

This patch separates the lit tests that check for the functionality of
lit's built-in cat command into its own test file and folder. This is a
prerequisite for https://github.com/llvm/llvm-project/pull/101530.
---
 .../Inputs/{shtest-shell => }/check_path.py   |   0
 .../cat-error-0.txt                           |   0
 .../cat-error-1.txt                           |   0
 .../utils/lit/tests/Inputs/shtest-cat/cat.txt |  83 ++++++++++++++
 .../cat_nonprinting.bin                       | Bin
 .../utils/lit/tests/Inputs/shtest-cat/lit.cfg |   8 ++
 .../tests/Inputs/shtest-shell/valid-shell.txt | 108 ++----------------
 llvm/utils/lit/tests/shtest-cat.py            |  23 ++++
 llvm/utils/lit/tests/shtest-shell.py          |  18 +--
 9 files changed, 127 insertions(+), 113 deletions(-)
 rename llvm/utils/lit/tests/Inputs/{shtest-shell => }/check_path.py (100%)
 rename llvm/utils/lit/tests/Inputs/{shtest-shell => shtest-cat}/cat-error-0.txt (100%)
 rename llvm/utils/lit/tests/Inputs/{shtest-shell => shtest-cat}/cat-error-1.txt (100%)
 create mode 100644 llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
 rename llvm/utils/lit/tests/Inputs/{shtest-shell => shtest-cat}/cat_nonprinting.bin (100%)
 create mode 100644 llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg
 create mode 100644 llvm/utils/lit/tests/shtest-cat.py

diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/check_path.py b/llvm/utils/lit/tests/Inputs/check_path.py
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/check_path.py
rename to llvm/utils/lit/tests/Inputs/check_path.py
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-0.txt
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt
rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-0.txt
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-1.txt
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt
rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-1.txt
diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
new file mode 100644
index 00000000000000..7375a7497e5bec
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt
@@ -0,0 +1,83 @@
+## Test cat command with a single file.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp.write
+# RUN: cat %T/testCat/temp.write > %T/testCat/tempcat.write
+# RUN: %{python} %S/../check_path.py file %T/testCat/tempcat.write > %T/testCat/path.out
+# RUN: FileCheck --check-prefix=FILE-EXISTS < %T/testCat/path.out %s
+# RUN: FileCheck --check-prefix=CAT-OUTPUT < %T/testCat/tempcat.write %s
+# FILE-EXISTS: True
+# CAT-OUTPUT: abcdefgh
+#
+## Test cat command with multiple files.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp1.write
+# RUN: echo "efghijkl" > %T/testCat/temp2.write
+# RUN: echo "mnopqrst" > %T/testCat/temp3.write
+# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write %T/testCat/temp3.write > %T/testCat/tempmulticat.write
+# RUN: %{python} %S/../check_path.py file %T/testCat/tempmulticat.write > %T/testCat/path.out
+# RUN: FileCheck --check-prefix=MULTI-FILE-EXISTS < %T/testCat/path.out %s
+# RUN: FileCheck --check-prefix=MULTI-CAT-OUTPUT < %T/testCat/tempmulticat.write %s
+# MULTI-FILE-EXISTS: True
+# MULTI-CAT-OUTPUT: abcdefgh
+# MULTI-CAT-OUTPUT-NEXT: efghijkl
+# MULTI-CAT-OUTPUT-NEXT: mnopqrst
+#
+## Test cat command with multiple files and piped output to FileCheck.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "abcdefgh" > %T/testCat/temp1.write
+# RUN: echo "efghijkl" > %T/testCat/temp2.write
+# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write | FileCheck --check-prefix=PIPED-CAT-OUTPUT %s
+# PIPED-CAT-OUTPUT: abcdefgh
+# PIPED-CAT-OUTPUT-NEXT: efghijkl
+#
+## Test cat command with multiple files and glob expressions.
+#
+# RUN: rm -rf %T/testCat
+# RUN: mkdir -p %T/testCat
+# RUN: echo "cvbnm" > %T/testCat/temp1.write
+# RUN: echo "qwerty" > %T/testCat/temp2.write
+# RUN: cat %T/testCat/*.write | FileCheck --check-prefix=GLOB-CAT-OUTPUT %s
+# GLOB-CAT-OUTPUT: cvbnm
+# GLOB-CAT-OUTPUT-NEXT: qwerty
+#
+## Test cat command with -v option
+#
+# RUN: cat -v %S/cat_nonprinting.bin | FileCheck --check-prefix=NP-CAT-OUTPUT %s
+# NP-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
+# NP-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
+# NP-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
+# NP-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
+# NP-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
+# NP-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
+# NP-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
+# NP-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
+# NP-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
+# NP-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
+# NP-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
+# NP-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
+# NP-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
+# NP-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
+#
+## Test cat command with -show-nonprinting option
+#
+# RUN: cat --show-nonprinting %S/cat_nonprinting.bin | FileCheck --check-prefix=NPLONG-CAT-OUTPUT %s
+# NPLONG-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
+# NPLONG-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
+# NPLONG-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
+# NPLONG-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
+# NPLONG-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
+# NPLONG-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
+# NPLONG-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
+# NPLONG-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
+# NPLONG-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
+# NPLONG-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
+# NPLONG-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
+# NPLONG-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
+# NPLONG-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
+# NPLONG-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin b/llvm/utils/lit/tests/Inputs/shtest-cat/cat_nonprinting.bin
similarity index 100%
rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin
rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat_nonprinting.bin
diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg
new file mode 100644
index 00000000000000..8f197946e28b5c
--- /dev/null
+++ b/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg
@@ -0,0 +1,8 @@
+import lit.formats
+
+config.name = "shtest-cat"
+config.suffixes = [".txt"]
+config.test_format = lit.formats.ShTest()
+config.test_source_root = None
+config.test_exec_root = None
+config.substitutions.append(("%{python}", '"%s"' % (sys.executable)))
diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt b/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
index 7267b9b9ef5aba..75ce8b7733ad7d 100644
--- a/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
+++ b/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt
@@ -2,13 +2,13 @@
 # Check force remove commands success whether the file does or doesn't exist.
 #
 # RUN: rm -f %t.write
-# RUN: %{python} %S/check_path.py file %t.write > %t.out
+# RUN: %{python} %S/../check_path.py file %t.write > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s
 # RUN: echo "create a temp file" > %t.write
-# RUN: %{python} %S/check_path.py file %t.write > %t.out
+# RUN: %{python} %S/../check_path.py file %t.write > %t.out
 # RUN: FileCheck --check-prefix=FILE-EXIST < %t.out %s
 # RUN: rm -f %t.write
-# RUN: %{python} %S/check_path.py file %t.write > %t.out
+# RUN: %{python} %S/../check_path.py file %t.write > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s
 #
 # REMOVE-FILE: False
@@ -19,14 +19,14 @@
 #
 # Check the mkdir command with -p option.
 # RUN: rm -f -r %T/test
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s
 # RUN: mkdir -p %T/test
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=MAKE-PARENT-DIR < %t.out %s
 # RUN: rm -f %T/test || true
 # RUN: rm -f -r %T/test
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s
 #
 # MAKE-PARENT-DIR: True
@@ -36,13 +36,13 @@
 #
 # RUN: rm -rf %T/test1
 # RUN: mkdir %T/test1
-# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out
 # RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s
 # RUN: cd %T/test1 && mkdir foo
-# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out
 # RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s
 # RUN: cd %T && rm -rf %T/test1
-# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-DIR < %t.out %s
 #
 # MAKE-DIR: True
@@ -52,16 +52,16 @@
 #
 # RUN: rm -rf %T/test
 # RUN: mkdir -p %T/test/test1 %T/test/test2
-# RUN: %{python} %S/check_path.py dir %T/test %T/test/test1 %T/test/test2 > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test %T/test/test1 %T/test/test2 > %t.out
 # RUN: FileCheck --check-prefix=DIRS-EXIST < %t.out %s
 # RUN: mkdir %T/test || true
 # RUN: echo "create a temp file" > %T/test/temp.write
 # RUN: echo "create a temp1 file" > %T/test/test1/temp1.write
 # RUN: echo "create a temp2 file" > %T/test/test2/temp2.write
-# RUN: %{python} %S/check_path.py file %T/test/temp.write %T/test/test1/temp1.write %T/test/test2/temp2.write> %t.out
+# RUN: %{python} %S/../check_path.py file %T/test/temp.write %T/test/test1/temp1.write %T/test/test2/temp2.write> %t.out
 # RUN: FileCheck --check-prefix=FILES-EXIST < %t.out %s
 # RUN: rm -r -f %T/*
-# RUN: %{python} %S/check_path.py dir %T/test > %t.out
+# RUN: %{python} %S/../check_path.py dir %T/test > %t.out
 # RUN: FileCheck --check-prefix=REMOVE-ALL < %t.out %s
 #
 # DIRS-EXIST: True
@@ -85,87 +85,3 @@
 # RUN: cd %T/dir1 && echo "hello" > temp1.txt
 # RUN: cd %T/dir2 && echo "hello" > temp2.txt
 # RUN: diff temp2.txt ../dir1/temp1.txt
-#
-# Check cat command with single file.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "abcdefgh" > %T/testCat/temp.write
-# RUN: cat %T/testCat/temp.write > %T/testCat/tempcat.write
-# RUN: %{python} %S/check_path.py file %T/testCat/tempcat.write > %T/testCat/path.out
-# RUN: FileCheck --check-prefix=FILE-EXISTS < %T/testCat/path.out %s
-# RUN: FileCheck --check-prefix=CAT-OUTPUT < %T/testCat/tempcat.write %s
-# FILE-EXISTS: True
-# CAT-OUTPUT: abcdefgh
-#
-# Check cat command with multiple files.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "abcdefgh" > %T/testCat/temp1.write
-# RUN: echo "efghijkl" > %T/testCat/temp2.write
-# RUN: echo "mnopqrst" > %T/testCat/temp3.write
-# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write %T/testCat/temp3.write > %T/testCat/tempmulticat.write
-# RUN: %{python} %S/check_path.py file %T/testCat/tempmulticat.write > %T/testCat/path.out
-# RUN: FileCheck --check-prefix=MULTI-FILE-EXISTS < %T/testCat/path.out %s
-# RUN: FileCheck --check-prefix=MULTI-CAT-OUTPUT < %T/testCat/tempmulticat.write %s
-# MULTI-FILE-EXISTS: True
-# MULTI-CAT-OUTPUT: abcdefgh
-# MULTI-CAT-OUTPUT-NEXT: efghijkl
-# MULTI-CAT-OUTPUT-NEXT: mnopqrst
-#
-# Check cat command with multiple files and piped output to FileCheck.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "abcdefgh" > %T/testCat/temp1.write
-# RUN: echo "efghijkl" > %T/testCat/temp2.write
-# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write | FileCheck --check-prefix=PIPED-CAT-OUTPUT %s
-# PIPED-CAT-OUTPUT: abcdefgh
-# PIPED-CAT-OUTPUT-NEXT: efghijkl
-#
-# Check cat command with multiple files and glob expressions.
-#
-# RUN: rm -rf %T/testCat
-# RUN: mkdir -p %T/testCat
-# RUN: echo "cvbnm" > %T/testCat/temp1.write
-# RUN: echo "qwerty" > %T/testCat/temp2.write
-# RUN: cat %T/testCat/*.write | FileCheck --check-prefix=GLOB-CAT-OUTPUT %s
-# GLOB-CAT-OUTPUT: cvbnm
-# GLOB-CAT-OUTPUT-NEXT: qwerty
-#
-# Check cat command with -v option
-#
-# RUN: cat -v %S/cat_nonprinting.bin | FileCheck --check-prefix=NP-CAT-OUTPUT %s
-# NP-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
-# NP-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
-# NP-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
-# NP-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
-# NP-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
-# NP-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
-# NP-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
-# NP-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
-# NP-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
-# NP-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
-# NP-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
-# NP-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
-# NP-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
-# NP-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
-#
-# Check cat command with -show-nonprinting option
-#
-# RUN: cat --show-nonprinting %S/cat_nonprinting.bin | FileCheck --check-prefix=NPLONG-CAT-OUTPUT %s
-# NPLONG-CAT-OUTPUT: ^@^A^B^C^D^E^F^G	^H
-# NPLONG-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S
-# NPLONG-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&'
-# NPLONG-CAT-OUTPUT-NEXT: ()*+,-./0123456789:;
-# NPLONG-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO
-# NPLONG-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc
-# NPLONG-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw
-# NPLONG-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K
-# NPLONG-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_
-# NPLONG-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3
-# NPLONG-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-<M-=M->M-?M-@M-AM-BM-CM-DM-EM-FM-G
-# NPLONG-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[
-# NPLONG-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o
-# NPLONG-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^?
diff --git a/llvm/utils/lit/tests/shtest-cat.py b/llvm/utils/lit/tests/shtest-cat.py
new file mode 100644
index 00000000000000..5efe25c41684a1
--- /dev/null
+++ b/llvm/utils/lit/tests/shtest-cat.py
@@ -0,0 +1,23 @@
+## Test the cat command.
+#
+# RUN: not %{lit} -a -v %{inputs}/shtest-cat \
+# RUN: | FileCheck -match-full-lines %s
+# END.
+
+# CHECK: FAIL: shtest-cat :: cat-error-0.txt ({{[^)]*}})
+# CHECK: cat -b temp1.txt
+# CHECK: # .---command stderr{{-*}}
+# CHECK-NEXT: # | Unsupported: 'cat':  option -b not recognized
+# CHECK: # error: command failed with exit status: 1
+
+# CHECK: FAIL: shtest-cat :: cat-error-1.txt ({{[^)]*}})
+# CHECK: cat temp1.txt
+# CHECK: # .---command stderr{{-*}}
+# CHECK-NEXT: # | [Errno 2] No such file or directory: 'temp1.txt'
+# CHECK: # error: command failed with exit status: 1
+
+# CHECK: PASS: shtest-cat :: cat.txt ({{[^)]*}})
+
+# CHECK: Total Discovered Tests: 3
+# CHECK-NEXT: Passed: 1 {{\([0-9]*\.[0-9]*%\)}}
+# CHECK-NEXT: Failed: 2 {{\([0-9]*\.[0-9]*%\)}}
diff --git a/llvm/utils/lit/tests/shtest-shell.py b/llvm/utils/lit/tests/shtest-shell.py
index 86851194880620..8f2b865f333a57 100644
--- a/llvm/utils/lit/tests/shtest-shell.py
+++ b/llvm/utils/lit/tests/shtest-shell.py
@@ -18,22 +18,6 @@
 
 # CHECK: -- Testing:
 
-# CHECK: FAIL: shtest-shell :: cat-error-0.txt
-# CHECK: *** TEST 'shtest-shell :: cat-error-0.txt' FAILED ***
-# CHECK: cat -b temp1.txt
-# CHECK: # .---command stderr{{-*}}
-# CHECK: # | Unsupported: 'cat':  option -b not recognized
-# CHECK: # error: command failed with exit status: 1
-# CHECK: ***
-
-# CHECK: FAIL: shtest-shell :: cat-error-1.txt
-# CHECK: *** TEST 'shtest-shell :: cat-error-1.txt' FAILED ***
-# CHECK: cat temp1.txt
-# CHECK: # .---command stderr{{-*}}
-# CHECK: # | [Errno 2] No such file or directory: 'temp1.txt'
-# CHECK: # error: command failed with exit status: 1
-# CHECK: ***
-
 # CHECK: FAIL: shtest-shell :: colon-error.txt
 # CHECK: *** TEST 'shtest-shell :: colon-error.txt' FAILED ***
 # CHECK: :
@@ -651,4 +635,4 @@
 
 # CHECK: PASS: shtest-shell :: valid-shell.txt
 # CHECK: Unresolved Tests (1)
-# CHECK: Failed Tests (38)
+# CHECK: Failed Tests (36)

From e9b7983fc6826eceb819a3cdb0301c401847ade4 Mon Sep 17 00:00:00 2001
From: Harini0924 <79345568+Harini0924@users.noreply.github.com>
Date: Wed, 14 Aug 2024 14:49:19 -0700
Subject: [PATCH 03/47] [llvm-lit] Fix Unhashable TypeError when using lit's
 internal shell (#101590)

When using the lit internal shell with the command:
```
LIT_USE_INTERNAL_SHELL=1 ninja check-compiler-rt
```
The follow error is encountered:
```
File "TestRunner.py", line 770, in _executeShCmd
    inproc_builtin = inproc_builtins.get(args[0], None)
                     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: unhashable type: 'GlobItem'
```
This error is in a compiler-rt file:
```
TestCases/Linux/long-object-path.cpp
```
This error occurs because `args[0]` is of type `GlobItem`, which is not
hashable, leading to a `TypeError` when it is passed in
`inproc_builtins.get()`. To resolve this issue, I have updated the
implementation to ensure that `args[0]` is hashable before it is used in
`inproc_builtins`.
fixes: #102389
[link to
RFC](https://discourse.llvm.org/t/rfc-enabling-the-lit-internal-shell-by-default/80179)
---
 llvm/utils/lit/lit/TestRunner.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py
index da7fa86fd39173..cc903f9e3a1520 100644
--- a/llvm/utils/lit/lit/TestRunner.py
+++ b/llvm/utils/lit/lit/TestRunner.py
@@ -767,6 +767,10 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper):
         # echo-appending to a file.
         # FIXME: Standardize on the builtin echo implementation. We can use a
         # temporary file to sidestep blocking pipe write issues.
+
+        # Ensure args[0] is hashable.
+        args[0] = expand_glob(args[0], cmd_shenv.cwd)[0]
+
         inproc_builtin = inproc_builtins.get(args[0], None)
         if inproc_builtin and (args[0] != "echo" or len(cmd.commands) == 1):
             # env calling an in-process builtin is useless, so we take the safe

From a88f3a331137d6379f2f1189d5eb4b086c686ab4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn@outlook.com>
Date: Wed, 14 Aug 2024 16:54:38 -0500
Subject: [PATCH 04/47] [Clang] Add `__CLANG_GPU_DISABLE_MATH_WRAPPERS` macro
 for offloading math (#98234)

Summary:
Currently we replace all math calls with vendor specific ones. This
patch introduces a macro `__CLANG_GPU_DISABLE_MATH_WRAPPERS` that when
defined will disable this.

I went this route instead of a flag for two reasons. One, I think we
have too many flags as is, and we already have `-nogpuinc` to cover
disabling these wrappers entirely, so this would be a really specific
subset of that. Second, these math headers aren't easily decoupled by
simply not including a single header from the clang driver layer.
There's the cmath and the regular math forward declares it would disable
as well.

Note, this currently causes errors because the GPU `libm` doesn't have
`powi`, that's an NVIDIA extension I'll add to LLVM libm.
---
 clang/lib/Headers/__clang_cuda_math.h    |  5 +++
 clang/lib/Headers/__clang_hip_math.h     |  5 +++
 clang/test/Headers/gpu_disabled_math.cpp | 41 ++++++++++++++++++++++++
 3 files changed, 51 insertions(+)
 create mode 100644 clang/test/Headers/gpu_disabled_math.cpp

diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 04019165068668..44c6e9a4e48d1b 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -12,6 +12,10 @@
 #error "This file is for CUDA compilation only."
 #endif
 
+// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
+// libcalls reach the link step instead of being eagerly replaced.
+#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
+
 #ifndef __OPENMP_NVPTX__
 #if CUDA_VERSION < 9000
 #error This file is intended to be used with CUDA-9+ only.
@@ -345,4 +349,5 @@ __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")
 
+#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
 #endif // __CLANG_CUDA_MATH_H__
diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h
index 11e1e7d032586f..8468751d9de260 100644
--- a/clang/lib/Headers/__clang_hip_math.h
+++ b/clang/lib/Headers/__clang_hip_math.h
@@ -13,6 +13,10 @@
 #error "This file is for HIP and OpenMP AMDGCN device compilation only."
 #endif
 
+// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard
+// libcalls reach the link step instead of being eagerly replaced.
+#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS
+
 #if !defined(__HIPCC_RTC__)
 #include <limits.h>
 #include <stdint.h>
@@ -1321,4 +1325,5 @@ __host__ inline static int max(int __arg1, int __arg2) {
 #pragma pop_macro("__RETURN_TYPE")
 #pragma pop_macro("__FAST_OR_SLOW")
 
+#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS
 #endif // __CLANG_HIP_MATH_H__
diff --git a/clang/test/Headers/gpu_disabled_math.cpp b/clang/test/Headers/gpu_disabled_math.cpp
new file mode 100644
index 00000000000000..6e697f52120aeb
--- /dev/null
+++ b/clang/test/Headers/gpu_disabled_math.cpp
@@ -0,0 +1,41 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h      \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -D __CLANG_GPU_DISABLE_MATH_WRAPPERS | FileCheck -check-prefix=AMDGPU %s
+
+// RUN: %clang_cc1 -include __clang_cuda_runtime_wrapper.h      \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple nvptx64-nvidia-cuda -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu sm_90 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -D __CLANG_GPU_DISABLE_MATH_WRAPPERS | FileCheck -check-prefix=NVPTX %s
+
+extern "C" double sin(double x);
+
+// AMDGPU-LABEL: define dso_local noundef double @_Z3food(
+// AMDGPU-SAME: double noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// AMDGPU-NEXT:  [[ENTRY:.*:]]
+// AMDGPU-NEXT:    [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[X_ADDR:%.*]] = alloca double, align 8, addrspace(5)
+// AMDGPU-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// AMDGPU-NEXT:    [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr
+// AMDGPU-NEXT:    store double [[X]], ptr [[X_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP0:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8
+// AMDGPU-NEXT:    [[TMP1:%.*]] = call double @llvm.sin.f64(double [[TMP0]])
+// AMDGPU-NEXT:    ret double [[TMP1]]
+//
+// NVPTX-LABEL: define dso_local noundef double @_Z3food(
+// NVPTX-SAME: double noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// NVPTX-NEXT:  [[ENTRY:.*:]]
+// NVPTX-NEXT:    [[X_ADDR:%.*]] = alloca double, align 8
+// NVPTX-NEXT:    store double [[X]], ptr [[X_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP0:%.*]] = load double, ptr [[X_ADDR]], align 8
+// NVPTX-NEXT:    [[TMP1:%.*]] = call double @llvm.sin.f64(double [[TMP0]])
+// NVPTX-NEXT:    ret double [[TMP1]]
+//
+double foo(double x) {
+  return sin(x);
+}

From 743e99dcf5146dd4e2c20d20800e91595da47be9 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Tue, 13 Aug 2024 15:14:11 -0700
Subject: [PATCH 05/47] Reland "[flang][cuda] Use cuda runtime API #103488"

CUDA Fortran is meant to be an equivalent to the runtime API. Therefore, it
makes more sense to use the cuda rt API in the allocators for CUF.
---
 flang/include/flang/Runtime/CUDA/allocator.h  |  7 ++--
 flang/runtime/CUDA/CMakeLists.txt             | 10 ++++--
 flang/runtime/CUDA/allocator.cpp              | 24 ++++++--------
 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 33 +------------------
 4 files changed, 22 insertions(+), 52 deletions(-)

diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index f0bfc1548e6458..4527c9f18fa054 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -13,11 +13,10 @@
 #include "flang/Runtime/entry-names.h"
 
 #define CUDA_REPORT_IF_ERROR(expr) \
-  [](CUresult result) { \
-    if (!result) \
+  [](cudaError_t err) { \
+    if (err == cudaSuccess) \
       return; \
-    const char *name = nullptr; \
-    cuGetErrorName(result, &name); \
+    const char *name = cudaGetErrorName(err); \
     if (!name) \
       name = "<unknown>"; \
     Terminator terminator{__FILE__, __LINE__}; \
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
index 88243536139e46..c9a20ebcc82e07 100644
--- a/flang/runtime/CUDA/CMakeLists.txt
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -7,14 +7,20 @@
 #===------------------------------------------------------------------------===#
 
 include_directories(${CUDAToolkit_INCLUDE_DIRS})
-find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
 
 add_flang_library(CufRuntime
   allocator.cpp
   descriptor.cpp
 )
+
+if (BUILD_SHARED_LIBS)
+  set(CUDA_RT_TARGET CUDA::cudart)
+else()
+  set(CUDA_RT_TARGET CUDA::cudart_static)
+endif()
+
 target_link_libraries(CufRuntime
   PRIVATE
   FortranRuntime
-  ${CUDA_RUNTIME_LIBRARY}
+  ${CUDA_RT_TARGET}
 )
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index bd657b800c61e8..d4a473d58e86cd 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -15,7 +15,7 @@
 #include "flang/ISO_Fortran_binding_wrapper.h"
 #include "flang/Runtime/allocator-registry.h"
 
-#include "cuda.h"
+#include "cuda_runtime.h"
 
 namespace Fortran::runtime::cuda {
 extern "C" {
@@ -34,32 +34,28 @@ void RTDEF(CUFRegisterAllocator)() {
 
 void *CUFAllocPinned(std::size_t sizeInBytes) {
   void *p;
-  CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
+  CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes));
   return p;
 }
 
-void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
+void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); }
 
 void *CUFAllocDevice(std::size_t sizeInBytes) {
-  CUdeviceptr p = 0;
-  CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
-  return reinterpret_cast<void *>(p);
+  void *p;
+  CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes));
+  return p;
 }
 
-void CUFFreeDevice(void *p) {
-  CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
-}
+void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
 void *CUFAllocManaged(std::size_t sizeInBytes) {
-  CUdeviceptr p = 0;
+  void *p;
   CUDA_REPORT_IF_ERROR(
-      cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
+      cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal));
   return reinterpret_cast<void *>(p);
 }
 
-void CUFFreeManaged(void *p) {
-  CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
-}
+void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); }
 
 void *CUFAllocUnified(std::size_t sizeInBytes) {
   // Call alloc managed for the time being.
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 9f5ec289ee8f74..b51ff0ac006cc6 100644
--- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -14,7 +14,7 @@
 #include "flang/Runtime/allocatable.h"
 #include "flang/Runtime/allocator-registry.h"
 
-#include "cuda.h"
+#include "cuda_runtime.h"
 
 using namespace Fortran::runtime;
 using namespace Fortran::runtime::cuda;
@@ -25,38 +25,9 @@ static OwningPtr<Descriptor> createAllocatable(
       CFI_attribute_allocatable);
 }
 
-thread_local static int32_t defaultDevice = 0;
-
-CUdevice getDefaultCuDevice() {
-  CUdevice device;
-  CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
-  return device;
-}
-
-class ScopedContext {
-public:
-  ScopedContext() {
-    // Static reference to CUDA primary context for device ordinal
-    // defaultDevice.
-    static CUcontext context = [] {
-      CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
-      CUcontext ctx;
-      // Note: this does not affect the current context.
-      CUDA_REPORT_IF_ERROR(
-          cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
-      return ctx;
-    }();
-
-    CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
-  }
-
-  ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
-};
-
 TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
   using Fortran::common::TypeCategory;
   RTNAME(CUFRegisterAllocator)();
-  ScopedContext ctx;
   // REAL(4), DEVICE, ALLOCATABLE :: a(:)
   auto a{createAllocatable(TypeCategory::Real, 4)};
   a->SetAllocIdx(kDeviceAllocatorPos);
@@ -74,7 +45,6 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
 TEST(AllocatableCUFTest, SimplePinnedAllocate) {
   using Fortran::common::TypeCategory;
   RTNAME(CUFRegisterAllocator)();
-  ScopedContext ctx;
   // INTEGER(4), PINNED, ALLOCATABLE :: a(:)
   auto a{createAllocatable(TypeCategory::Integer, 4)};
   EXPECT_FALSE(a->HasAddendum());
@@ -93,7 +63,6 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) {
 TEST(AllocatableCUFTest, DescriptorAllocationTest) {
   using Fortran::common::TypeCategory;
   RTNAME(CUFRegisterAllocator)();
-  ScopedContext ctx;
   // REAL(4), DEVICE, ALLOCATABLE :: a(:)
   auto a{createAllocatable(TypeCategory::Real, 4)};
   Descriptor *desc = nullptr;

From f1779ae53b5a8f65406648f1b69e3dd1ae0340b0 Mon Sep 17 00:00:00 2001
From: Keith Smiley <keithbsmiley@gmail.com>
Date: Wed, 14 Aug 2024 14:58:43 -0700
Subject: [PATCH 06/47] [bazel] Port 4bac8fd8904904bc7d502f39851eef50b5afff73
 (#104278)

---
 .../llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel
index b9e0a2e153ac1e..13f6f815d39950 100644
--- a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel
@@ -33,5 +33,8 @@ expand_template(
             "//llvm:not",
         ] + glob(["Inputs/**"]),
     )
-    for src in glob(["*/*.py"])
+    for src in glob(
+        ["*/*.py"],
+        exclude = ["Inputs/**"],
+    )
 ]

From 48809fafbc083a2e4c03f70406b712ff18b42554 Mon Sep 17 00:00:00 2001
From: Joshua Batista <jbatista@microsoft.com>
Date: Wed, 14 Aug 2024 15:11:34 -0700
Subject: [PATCH 07/47] Remove unused variable, and unneeded extract element
 instruction (#103489)

This PR removes an unneeded extract element instruction from codegen,
along with the variable that captured that instruction's return value.
---
 clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl | 2 +-
 llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp | 1 -
 llvm/test/CodeGen/DirectX/normalize.ll             | 6 ------
 3 files changed, 1 insertion(+), 8 deletions(-)

diff --git a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
index c72c8b3c222b6b..fc48c9b2589f7e 100644
--- a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
+++ b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected
+// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected
 
 void test_too_few_arg()
 {
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
index 626321f44c2bfc..e63633b8a1e1ab 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp
@@ -252,7 +252,6 @@ static bool expandNormalizeIntrinsic(CallInst *Orig) {
     return true;
   }
 
-  Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0);
   unsigned XVecSize = XVec->getNumElements();
   Value *DotProduct = nullptr;
   // use the dot intrinsic corresponding to the vector size
diff --git a/llvm/test/CodeGen/DirectX/normalize.ll b/llvm/test/CodeGen/DirectX/normalize.ll
index f3533cc56e7c25..e2c8a5d4656a65 100644
--- a/llvm/test/CodeGen/DirectX/normalize.ll
+++ b/llvm/test/CodeGen/DirectX/normalize.ll
@@ -22,7 +22,6 @@ entry:
 
 define noundef <2 x half> @test_normalize_half2(<2 x half> noundef %p0) {
 entry:
-  ; CHECK: extractelement <2 x half> %{{.*}}, i64 0
   ; EXPCHECK: [[doth2:%.*]] = call half @llvm.dx.dot2.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}})
   ; DOPCHECK: [[doth2:%.*]] = call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth2]])
@@ -37,7 +36,6 @@ entry:
 
 define noundef <3 x half> @test_normalize_half3(<3 x half> noundef %p0) {
 entry:
-  ; CHECK: extractelement <3 x half> %{{.*}}, i64 0
   ; EXPCHECK: [[doth3:%.*]] = call half @llvm.dx.dot3.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}})
   ; DOPCHECK: [[doth3:%.*]] = call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth3]])
@@ -52,7 +50,6 @@ entry:
 
 define noundef <4 x half> @test_normalize_half4(<4 x half> noundef %p0) {
 entry:
-  ; CHECK: extractelement <4 x half> %{{.*}}, i64 0
   ; EXPCHECK: [[doth4:%.*]] = call half @llvm.dx.dot4.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}})
   ; DOPCHECK: [[doth4:%.*]] = call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth4]])
@@ -74,7 +71,6 @@ entry:
 
 define noundef <2 x float> @test_normalize_float2(<2 x float> noundef %p0) {
 entry:
-  ; CHECK: extractelement <2 x float> %{{.*}}, i64 0
   ; EXPCHECK: [[dotf2:%.*]] = call float @llvm.dx.dot2.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}})
   ; DOPCHECK: [[dotf2:%.*]] = call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf2]])
@@ -89,7 +85,6 @@ entry:
 
 define noundef <3 x float> @test_normalize_float3(<3 x float> noundef %p0) {
 entry:
-  ; CHECK: extractelement <3 x float> %{{.*}}, i64 0
   ; EXPCHECK: [[dotf3:%.*]] = call float @llvm.dx.dot3.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}})
   ; DOPCHECK: [[dotf3:%.*]] = call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf3]])
@@ -104,7 +99,6 @@ entry:
 
 define noundef <4 x float> @test_normalize_float4(<4 x float> noundef %p0) {
 entry:
-  ; CHECK: extractelement <4 x float> %{{.*}}, i64 0
   ; EXPCHECK: [[dotf4:%.*]] = call float @llvm.dx.dot4.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}})
   ; DOPCHECK: [[dotf4:%.*]] = call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
   ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf4]])

From 4f7ce107de0c3ae0fb5748f98bc696b6eec7aad9 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 14 Aug 2024 14:51:05 -0700
Subject: [PATCH 08/47] [RISCV] Don't combine (sext_inreg (fmv_x_anyexth X),
 i16) with Zhinx.

With Zfh and Zfhmin this combine creates a fmv_x_signexth node so we can
remember that the result is sign extended. This become a fmv.x.h
instruction which sign extends its result.

With Zhinx, fmv_x_signexth becomes a COPY_TO_REGCLASS. In order for
this to guarantee the result is properly sign extended we need all
producers of a GPRF16 register class to guarantee the rest of the
GPR is sign extended. I don't think we've done that. bitcasts from i16
to f16 definitely don't do it.

The safest thing to do is to not do this combine so the sign_extend_inreg
will emit a shift pair. This is also consistent with the code generated
for Zfinx on RV64, we don't assume the upper 32 bits are sign extended.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp        | 4 +++-
 llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td         | 1 -
 llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll    | 2 ++
 llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll | 2 ++
 4 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2b14deb479bf6f..02f48d41b56b3c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13843,8 +13843,10 @@ performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
   EVT VT = N->getValueType(0);
 
   // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
+  // Don't do this with Zhinx. We need to explicitly sign extend the GPR.
   if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
-      cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
+      cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16) &&
+      Subtarget.hasStdExtZfhmin())
     return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
                        Src.getOperand(0));
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 85715ca9145c35..abdd366741eb04 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -458,7 +458,6 @@ def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>;
 // Moves (no conversion)
 def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>;
 def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
-def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
 
 def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>;
 } // Predicates = [HasStdExtZhinxmin]
diff --git a/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll
index 08dcefa0464030..9aec4dea63b9d2 100644
--- a/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll
@@ -123,6 +123,8 @@ define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind {
 ; RV64IZHINX-LABEL: bcvt_f16_to_sext_i16:
 ; RV64IZHINX:       # %bb.0:
 ; RV64IZHINX-NEXT:    fadd.h a0, a0, a1
+; RV64IZHINX-NEXT:    slli a0, a0, 48
+; RV64IZHINX-NEXT:    srai a0, a0, 48
 ; RV64IZHINX-NEXT:    ret
   %1 = fadd half %a, %b
   %2 = bitcast half %1 to i16
diff --git a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll
index f867fe46f0ec33..aac1a65e6c4fec 100644
--- a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll
@@ -144,6 +144,8 @@ define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind {
 ; RV64IZHINXMIN-NEXT:    fcvt.s.h a0, a0
 ; RV64IZHINXMIN-NEXT:    fadd.s a0, a0, a1
 ; RV64IZHINXMIN-NEXT:    fcvt.h.s a0, a0
+; RV64IZHINXMIN-NEXT:    slli a0, a0, 48
+; RV64IZHINXMIN-NEXT:    srai a0, a0, 48
 ; RV64IZHINXMIN-NEXT:    ret
   %1 = fadd half %a, %b
   %2 = bitcast half %1 to i16

From 539bf499615dbbfe98deaac1021f351eaad330ea Mon Sep 17 00:00:00 2001
From: Kirill Stoimenov <kstoimenov@google.com>
Date: Wed, 14 Aug 2024 22:29:07 +0000
Subject: [PATCH 09/47] [Sanitizers] Disable prctl test on Android.

---
 compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp
index f33aa2004db79e..d0be7f4fa87899 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp
@@ -1,5 +1,7 @@
 // RUN: %clangxx %s -o %t && %run %t %p
 
+// UNSUPPORTED: android
+
 #include <assert.h>
 #include <errno.h>
 #include <stdint.h>

From 2b959bd7f21bc7550a99fb160997002b7e4f1b62 Mon Sep 17 00:00:00 2001
From: cor3ntin <corentinjabot@gmail.com>
Date: Thu, 15 Aug 2024 00:55:54 +0200
Subject: [PATCH 10/47] [Clang] Error on extraneous template headers by
 default. (#104046)

As discussed here

https://github.com/llvm/llvm-project/issues/99296#issuecomment-2240807413

Fixes #99296
Fixes #50294
---
 clang/docs/ReleaseNotes.rst                      | 9 +++++++++
 clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 ++-
 clang/test/Misc/warning-flags.c                  | 3 +--
 clang/test/SemaTemplate/temp_explicit.cpp        | 7 ++++++-
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1541b0cbf4875c..7c4451d93394c3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -65,6 +65,15 @@ C++ Specific Potentially Breaking Changes
   `-Wno-enum-constexpr-conversion`, to allow for a transition period for users.
   Now, in Clang 20, **it is no longer possible to suppress the diagnostic**.
 
+- Extraneous template headers are now ill-formed by default.
+  This error can be disable with ``-Wno-error=extraneous-template-head``.
+
+  .. code-block:: c++
+
+    template <> // error: extraneous template head
+    template <typename T>
+    void f();
+    
 ABI Changes in This Version
 ---------------------------
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c710c3360be7da..da2f939067bfab 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5428,7 +5428,8 @@ def err_template_spec_extra_headers : Error<
   "extraneous template parameter list in template specialization or "
   "out-of-line template definition">;
 def ext_template_spec_extra_headers : ExtWarn<
-  "extraneous template parameter list in template specialization">;
+  "extraneous template parameter list in template specialization">,
+   InGroup<DiagGroup<"extraneous-template-head">>, DefaultError;
 def note_explicit_template_spec_does_not_need_header : Note<
   "'template<>' header not required for explicitly-specialized class %0 "
   "declared here">;
diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c
index cdbe1e95cba965..35543e6a49ffda 100644
--- a/clang/test/Misc/warning-flags.c
+++ b/clang/test/Misc/warning-flags.c
@@ -18,14 +18,13 @@ This test serves two purposes:
 
 The list of warnings below should NEVER grow.  It should gradually shrink to 0.
 
-CHECK: Warnings without flags (65):
+CHECK: Warnings without flags (64):
 
 CHECK-NEXT:   ext_expected_semi_decl_list
 CHECK-NEXT:   ext_missing_whitespace_after_macro_name
 CHECK-NEXT:   ext_new_paren_array_nonconst
 CHECK-NEXT:   ext_plain_complex
 CHECK-NEXT:   ext_template_arg_extra_parens
-CHECK-NEXT:   ext_template_spec_extra_headers
 CHECK-NEXT:   ext_typecheck_cond_incompatible_operands
 CHECK-NEXT:   ext_typecheck_ordered_comparison_of_pointer_integer
 CHECK-NEXT:   ext_using_undefined_std
diff --git a/clang/test/SemaTemplate/temp_explicit.cpp b/clang/test/SemaTemplate/temp_explicit.cpp
index 0bb0cfad61fdb0..4612e4a57e90e0 100644
--- a/clang/test/SemaTemplate/temp_explicit.cpp
+++ b/clang/test/SemaTemplate/temp_explicit.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -Wc++11-compat %s
 // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -Wc++11-compat -std=c++98 %s
 // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -pedantic -std=c++20 %s
 //
 // Tests explicit instantiation of templates.
 template<typename T, typename U = T> class X0 { };
@@ -128,11 +129,15 @@ struct Foo<int> // expected-note{{header not required for explicitly-specialized
     {};
 };
 
-template <> // expected-warning{{extraneous template parameter list}}
+template <> // expected-error{{extraneous template parameter list}}
 template <>
 struct Foo<int>::Bar<void>
 {};
 
+#if __cplusplus >= 202002L
+template<> void f(auto); // expected-error{{extraneous template parameter list}}
+#endif
+
 namespace N1 {
 
   template<typename T> struct X7 { }; // expected-note{{here}}

From 9a666deecb9ff6ca3a6b12e6c2877e19b74b54da Mon Sep 17 00:00:00 2001
From: Justin Stitt <justinstitt@google.com>
Date: Wed, 14 Aug 2024 17:17:06 -0700
Subject: [PATCH 11/47] [Clang] Overflow Pattern Exclusions (#100272)

Introduce "-fsanitize-overflow-pattern-exclusion=" which can be used to
disable sanitizer instrumentation for common overflow-dependent code
patterns.

For a wide selection of projects, proper overflow sanitization could
help catch bugs and solve security vulnerabilities. Unfortunately, in
some cases the integer overflow sanitizers are too noisy for their users
and are often left disabled. Providing users with a method to disable
sanitizer instrumentation of common patterns could mean more projects
actually utilize the sanitizers in the first place.

One such project that has opted to not use integer overflow (or
truncation) sanitizers is the Linux Kernel. There has been some
discussion[1] recently concerning mitigation strategies for unexpected
arithmetic overflow. This discussion is still ongoing and a succinct
article[2] accurately sums up the discussion. In summary, many Kernel
developers do not want to introduce more arithmetic wrappers when
most developers understand the code patterns as they are.

Patterns like:

    if (base + offset < base) { ... }

or

    while (i--) { ... }

or

    #define SOME -1UL

are extremely common in a code base like the Linux Kernel. It is
perhaps too much to ask of kernel developers to use arithmetic wrappers
in these cases. For example:

    while (wrapping_post_dec(i)) { ... }

which wraps some builtin would not fly. This would incur too many
changes to existing code; the code churn would be too much, at least too
much to justify turning on overflow sanitizers.

Currently, this commit tackles three pervasive idioms:

1. "if (a + b < a)" or some logically-equivalent re-ordering like "if (a > b + a)"
2. "while (i--)" (for unsigned) a post-decrement always overflows here
3. "-1UL, -2UL, etc" negation of unsigned constants will always overflow

The patterns that are excluded can be chosen from the following list:

- add-overflow-test
- post-decr-while
- negated-unsigned-const

These can be enabled with a comma-separated list:

    -fsanitize-overflow-pattern-exclusion=add-overflow-test,negated-unsigned-const

"all" or "none" may also be used to specify that all patterns should be
excluded or that none should be.

[1] https://lore.kernel.org/all/202404291502.612E0A10@keescook/
[2] https://lwn.net/Articles/979747/

CCs: @efriedma-quic @kees @jyknight @fmayer @vitalybuka
Signed-off-by: Justin Stitt <justinstitt@google.com>
Co-authored-by: Bill Wendling <morbo@google.com>
---
 clang/docs/ReleaseNotes.rst                   |  30 ++++
 clang/docs/UndefinedBehaviorSanitizer.rst     |  42 +++++
 clang/include/clang/AST/Expr.h                |   9 ++
 clang/include/clang/AST/Stmt.h                |   5 +
 clang/include/clang/Basic/LangOptions.def     |   2 +
 clang/include/clang/Basic/LangOptions.h       |  28 ++++
 clang/include/clang/Driver/Options.td         |   5 +
 clang/include/clang/Driver/SanitizerArgs.h    |   1 +
 clang/lib/AST/Expr.cpp                        |  54 +++++++
 clang/lib/CodeGen/CGExprScalar.cpp            |  41 ++++-
 clang/lib/Driver/SanitizerArgs.cpp            |  37 +++++
 clang/lib/Driver/ToolChains/Clang.cpp         |   3 +
 clang/lib/Frontend/CompilerInvocation.cpp     |  13 ++
 clang/lib/Serialization/ASTReaderStmt.cpp     |   1 +
 clang/lib/Serialization/ASTWriterStmt.cpp     |   1 +
 .../CodeGen/overflow-idiom-exclusion-fp.c     |  83 ++++++++++
 clang/test/CodeGen/overflow-idiom-exclusion.c | 151 ++++++++++++++++++
 17 files changed, 504 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/overflow-idiom-exclusion-fp.c
 create mode 100644 clang/test/CodeGen/overflow-idiom-exclusion.c

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7c4451d93394c3..f5696d6ce15dc7 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -392,6 +392,36 @@ Moved checkers
 Sanitizers
 ----------
 
+- Added the ``-fsanitize-overflow-pattern-exclusion=`` flag which can be used
+  to disable specific overflow-dependent code patterns. The supported patterns
+  are: ``add-overflow-test``, ``negated-unsigned-const``, and
+  ``post-decr-while``. The sanitizer instrumentation can be toggled off for all
+  available patterns by specifying ``all``. Conversely, you can disable all
+  exclusions with ``none``.
+
+  .. code-block:: c++
+
+     /// specified with ``-fsanitize-overflow-pattern-exclusion=add-overflow-test``
+     int common_overflow_check_pattern(unsigned base, unsigned offset) {
+       if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings, won't be instrumented
+     }
+
+     /// specified with ``-fsanitize-overflow-pattern-exclusion=negated-unsigned-const``
+     void negation_overflow() {
+       unsigned long foo = -1UL; // No longer causes a negation overflow warning
+       unsigned long bar = -2UL; // and so on...
+     }
+
+     /// specified with ``-fsanitize-overflow-pattern-exclusion=post-decr-while``
+     void while_post_decrement() {
+       unsigned char count = 16;
+       while (count--) { /* ... */} // No longer causes unsigned-integer-overflow sanitizer to trip
+     }
+
+  Many existing projects have a large amount of these code patterns present.
+  This new flag should allow those projects to enable integer sanitizers with
+  less noise.
+
 Python Binding Changes
 ----------------------
 - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``.
diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 531d56e313826c..9f3d980eefbea7 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -293,6 +293,48 @@ To silence reports from unsigned integer overflow, you can set
 ``-fsanitize-recover=unsigned-integer-overflow``, is particularly useful for
 providing fuzzing signal without blowing up logs.
 
+Disabling instrumentation for common overflow patterns
+------------------------------------------------------
+
+There are certain overflow-dependent or overflow-prone code patterns which
+produce a lot of noise for integer overflow/truncation sanitizers. Negated
+unsigned constants, post-decrements in a while loop condition and simple
+overflow checks are accepted and pervasive code patterns. However, the signal
+received from sanitizers instrumenting these code patterns may be too noisy for
+some projects. To disable instrumentation for these common patterns one should
+use ``-fsanitize-overflow-pattern-exclusion=``.
+
+Currently, this option supports three overflow-dependent code idioms:
+
+``negated-unsigned-const``
+
+.. code-block:: c++
+
+    /// -fsanitize-overflow-pattern-exclusion=negated-unsigned-const
+    unsigned long foo = -1UL; // No longer causes a negation overflow warning
+    unsigned long bar = -2UL; // and so on...
+
+``post-decr-while``
+
+.. code-block:: c++
+
+    /// -fsanitize-overflow-pattern-exclusion=post-decr-while
+    unsigned char count = 16;
+    while (count--) { /* ... */ } // No longer causes unsigned-integer-overflow sanitizer to trip
+
+``add-overflow-test``
+
+.. code-block:: c++
+
+    /// -fsanitize-overflow-pattern-exclusion=add-overflow-test
+    if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings,
+                                            // won't be instrumented (same for signed types)
+
+You can enable all exclusions with
+``-fsanitize-overflow-pattern-exclusion=all`` or disable all exclusions with
+``-fsanitize-overflow-pattern-exclusion=none``. Specifying ``none`` has
+precedence over other values.
+
 Issue Suppression
 =================
 
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 5b813bfc2faf90..f5863524723a2e 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -4043,6 +4043,15 @@ class BinaryOperator : public Expr {
   void setHasStoredFPFeatures(bool B) { BinaryOperatorBits.HasFPFeatures = B; }
   bool hasStoredFPFeatures() const { return BinaryOperatorBits.HasFPFeatures; }
 
+  /// Set and get the bit that informs arithmetic overflow sanitizers whether
+  /// or not they should exclude certain BinaryOperators from instrumentation
+  void setExcludedOverflowPattern(bool B) {
+    BinaryOperatorBits.ExcludedOverflowPattern = B;
+  }
+  bool hasExcludedOverflowPattern() const {
+    return BinaryOperatorBits.ExcludedOverflowPattern;
+  }
+
   /// Get FPFeatures from trailing storage
   FPOptionsOverride getStoredFPFeatures() const {
     assert(hasStoredFPFeatures());
diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h
index bbd7634bcc3bfb..f1a2aac0a8b2f8 100644
--- a/clang/include/clang/AST/Stmt.h
+++ b/clang/include/clang/AST/Stmt.h
@@ -650,6 +650,11 @@ class alignas(void *) Stmt {
     LLVM_PREFERRED_TYPE(bool)
     unsigned HasFPFeatures : 1;
 
+    /// Whether or not this BinaryOperator should be excluded from integer
+    /// overflow sanitization.
+    LLVM_PREFERRED_TYPE(bool)
+    unsigned ExcludedOverflowPattern : 1;
+
     SourceLocation OpLoc;
   };
 
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index d454a7ff2f8cf4..2e9f2c552aad8a 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -406,6 +406,8 @@ VALUE_LANGOPT(TrivialAutoVarInitMaxSize, 32, 0,
              "stop trivial automatic variable initialization if var size exceeds the specified size (in bytes). Must be greater than 0.")
 ENUM_LANGOPT(SignedOverflowBehavior, SignedOverflowBehaviorTy, 2, SOB_Undefined,
              "signed integer overflow handling")
+LANGOPT(IgnoreNegationOverflow, 1, 0, "ignore overflow caused by negation")
+LANGOPT(SanitizeOverflowIdioms, 1, 1, "enable instrumentation for common overflow idioms")
 ENUM_LANGOPT(ThreadModel  , ThreadModelKind, 2, ThreadModelKind::POSIX, "Thread Model")
 
 BENIGN_LANGOPT(ArrowDepth, 32, 256,
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index 91f1c2f2e6239e..eb4cb4b5a7e93f 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -367,6 +367,21 @@ class LangOptionsBase {
     PerThread,
   };
 
+  /// Exclude certain code patterns from being instrumented by arithmetic
+  /// overflow sanitizers
+  enum OverflowPatternExclusionKind {
+    /// Don't exclude any overflow patterns from sanitizers
+    None = 1 << 0,
+    /// Exclude all overflow patterns (below)
+    All = 1 << 1,
+    /// if (a + b < a)
+    AddOverflowTest = 1 << 2,
+    /// -1UL
+    NegUnsignedConst = 1 << 3,
+    /// while (count--)
+    PostDecrInWhile = 1 << 4,
+  };
+
   enum class DefaultVisiblityExportMapping {
     None,
     /// map only explicit default visibilities to exported
@@ -555,6 +570,11 @@ class LangOptions : public LangOptionsBase {
   /// The default stream kind used for HIP kernel launching.
   GPUDefaultStreamKind GPUDefaultStream;
 
+  /// Which overflow patterns should be excluded from sanitizer instrumentation
+  unsigned OverflowPatternExclusionMask = 0;
+
+  std::vector<std::string> OverflowPatternExclusionValues;
+
   /// The seed used by the randomize structure layout feature.
   std::string RandstructSeed;
 
@@ -630,6 +650,14 @@ class LangOptions : public LangOptionsBase {
     return MSCompatibilityVersion >= MajorVersion * 100000U;
   }
 
+  bool isOverflowPatternExcluded(OverflowPatternExclusionKind Kind) const {
+    if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::None)
+      return false;
+    if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::All)
+      return true;
+    return OverflowPatternExclusionMask & Kind;
+  }
+
   /// Reset all of the options that are not considered when building a
   /// module.
   void resetNonModularOptions();
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 6df3a6a5943a97..acc1f2fde53979 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2565,6 +2565,11 @@ defm sanitize_stats : BoolOption<"f", "sanitize-stats",
           "Disable">,
   BothFlags<[], [ClangOption], " sanitizer statistics gathering.">>,
   Group<f_clang_Group>;
+def fsanitize_overflow_pattern_exclusion_EQ : CommaJoined<["-"], "fsanitize-overflow-pattern-exclusion=">,
+  HelpText<"Specify the overflow patterns to exclude from artihmetic sanitizer instrumentation">,
+  Visibility<[ClangOption, CC1Option]>,
+  Values<"none,all,add-overflow-test,negated-unsigned-const,post-decr-while">,
+  MarshallingInfoStringVector<LangOpts<"OverflowPatternExclusionValues">>;
 def fsanitize_thread_memory_access : Flag<["-"], "fsanitize-thread-memory-access">,
                                      Group<f_clang_Group>,
                                      HelpText<"Enable memory access instrumentation in ThreadSanitizer (default)">;
diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h
index 47ef175302679f..e64ec463ca8907 100644
--- a/clang/include/clang/Driver/SanitizerArgs.h
+++ b/clang/include/clang/Driver/SanitizerArgs.h
@@ -33,6 +33,7 @@ class SanitizerArgs {
   std::vector<std::string> BinaryMetadataIgnorelistFiles;
   int CoverageFeatures = 0;
   int BinaryMetadataFeatures = 0;
+  int OverflowPatternExclusions = 0;
   int MsanTrackOrigins = 0;
   bool MsanUseAfterDtor = true;
   bool MsanParamRetval = true;
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 9d5b8167d0ee62..57475c66a94e35 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4759,6 +4759,53 @@ ParenListExpr *ParenListExpr::CreateEmpty(const ASTContext &Ctx,
   return new (Mem) ParenListExpr(EmptyShell(), NumExprs);
 }
 
+/// Certain overflow-dependent code patterns can have their integer overflow
+/// sanitization disabled. Check for the common pattern `if (a + b < a)` and
+/// return the resulting BinaryOperator responsible for the addition so we can
+/// elide overflow checks during codegen.
+static std::optional<BinaryOperator *>
+getOverflowPatternBinOp(const BinaryOperator *E) {
+  Expr *Addition, *ComparedTo;
+  if (E->getOpcode() == BO_LT) {
+    Addition = E->getLHS();
+    ComparedTo = E->getRHS();
+  } else if (E->getOpcode() == BO_GT) {
+    Addition = E->getRHS();
+    ComparedTo = E->getLHS();
+  } else {
+    return {};
+  }
+
+  const Expr *AddLHS = nullptr, *AddRHS = nullptr;
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(Addition);
+
+  if (BO && BO->getOpcode() == clang::BO_Add) {
+    // now store addends for lookup on other side of '>'
+    AddLHS = BO->getLHS();
+    AddRHS = BO->getRHS();
+  }
+
+  if (!AddLHS || !AddRHS)
+    return {};
+
+  const Decl *LHSDecl, *RHSDecl, *OtherDecl;
+
+  LHSDecl = AddLHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee();
+  RHSDecl = AddRHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee();
+  OtherDecl = ComparedTo->IgnoreParenImpCasts()->getReferencedDeclOfCallee();
+
+  if (!OtherDecl)
+    return {};
+
+  if (!LHSDecl && !RHSDecl)
+    return {};
+
+  if ((LHSDecl && LHSDecl == OtherDecl && LHSDecl != RHSDecl) ||
+      (RHSDecl && RHSDecl == OtherDecl && RHSDecl != LHSDecl))
+    return BO;
+  return {};
+}
+
 BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs,
                                Opcode opc, QualType ResTy, ExprValueKind VK,
                                ExprObjectKind OK, SourceLocation opLoc,
@@ -4768,8 +4815,15 @@ BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs,
   assert(!isCompoundAssignmentOp() &&
          "Use CompoundAssignOperator for compound assignments");
   BinaryOperatorBits.OpLoc = opLoc;
+  BinaryOperatorBits.ExcludedOverflowPattern = 0;
   SubExprs[LHS] = lhs;
   SubExprs[RHS] = rhs;
+  if (Ctx.getLangOpts().isOverflowPatternExcluded(
+          LangOptions::OverflowPatternExclusionKind::AddOverflowTest)) {
+    std::optional<BinaryOperator *> Result = getOverflowPatternBinOp(this);
+    if (Result.has_value())
+      Result.value()->BinaryOperatorBits.ExcludedOverflowPattern = 1;
+  }
   BinaryOperatorBits.HasFPFeatures = FPFeatures.requiresTrailingStorage();
   if (hasStoredFPFeatures())
     setStoredFPFeatures(FPFeatures);
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 84392745ea6144..6eac2b4c54e1ba 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -24,6 +24,7 @@
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ParentMapContext.h"
 #include "clang/AST/RecordLayout.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/CodeGenOptions.h"
@@ -195,13 +196,24 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) {
   if (!Op.mayHaveIntegerOverflow())
     return true;
 
+  const UnaryOperator *UO = dyn_cast<UnaryOperator>(Op.E);
+
+  if (UO && UO->getOpcode() == UO_Minus &&
+      Ctx.getLangOpts().isOverflowPatternExcluded(
+          LangOptions::OverflowPatternExclusionKind::NegUnsignedConst) &&
+      UO->isIntegerConstantExpr(Ctx))
+    return true;
+
   // If a unary op has a widened operand, the op cannot overflow.
-  if (const auto *UO = dyn_cast<UnaryOperator>(Op.E))
+  if (UO)
     return !UO->canOverflow();
 
   // We usually don't need overflow checks for binops with widened operands.
   // Multiplication with promoted unsigned operands is a special case.
   const auto *BO = cast<BinaryOperator>(Op.E);
+  if (BO->hasExcludedOverflowPattern())
+    return true;
+
   auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS());
   if (!OptionalLHSTy)
     return false;
@@ -2766,6 +2778,26 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior(
   llvm_unreachable("Unknown SignedOverflowBehaviorTy");
 }
 
+/// For the purposes of overflow pattern exclusion, does this match the
+/// "while(i--)" pattern?
+static bool matchesPostDecrInWhile(const UnaryOperator *UO, bool isInc,
+                                   bool isPre, ASTContext &Ctx) {
+  if (isInc || isPre)
+    return false;
+
+  // -fsanitize-overflow-pattern-exclusion=post-decr-while
+  if (!Ctx.getLangOpts().isOverflowPatternExcluded(
+          LangOptions::OverflowPatternExclusionKind::PostDecrInWhile))
+    return false;
+
+  // all Parents (usually just one) must be a WhileStmt
+  for (const auto &Parent : Ctx.getParentMapContext().getParents(*UO))
+    if (!Parent.get<WhileStmt>())
+      return false;
+
+  return true;
+}
+
 namespace {
 /// Handles check and update for lastprivate conditional variables.
 class OMPLastprivateConditionalUpdateRAII {
@@ -2877,6 +2909,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
   } else if (type->isIntegerType()) {
     QualType promotedType;
     bool canPerformLossyDemotionCheck = false;
+
+    bool excludeOverflowPattern =
+        matchesPostDecrInWhile(E, isInc, isPre, CGF.getContext());
+
     if (CGF.getContext().isPromotableIntegerType(type)) {
       promotedType = CGF.getContext().getPromotedIntegerType(type);
       assert(promotedType != type && "Shouldn't promote to the same type.");
@@ -2936,7 +2972,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) {
       value = EmitIncDecConsiderOverflowBehavior(E, value, isInc);
     } else if (E->canOverflow() && type->isUnsignedIntegerType() &&
-               CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) {
+               CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) &&
+               !excludeOverflowPattern) {
       value = EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec(
           E, value, isInc, E->getFPFeaturesInEffect(CGF.getLangOpts())));
     } else {
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 1fd870b72286e5..a63ee944fd1bb4 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -119,6 +119,10 @@ static SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
 static int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A,
                                  bool DiagnoseErrors);
 
+static int parseOverflowPatternExclusionValues(const Driver &D,
+                                               const llvm::opt::Arg *A,
+                                               bool DiagnoseErrors);
+
 /// Parse -f(no-)?sanitize-metadata= flag values, diagnosing any invalid
 /// components. Returns OR of members of \c BinaryMetadataFeature enumeration.
 static int parseBinaryMetadataFeatures(const Driver &D, const llvm::opt::Arg *A,
@@ -788,6 +792,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
           << "fsanitize-trap=cfi";
   }
 
+  for (const auto *Arg :
+       Args.filtered(options::OPT_fsanitize_overflow_pattern_exclusion_EQ)) {
+    Arg->claim();
+    OverflowPatternExclusions |=
+        parseOverflowPatternExclusionValues(D, Arg, DiagnoseErrors);
+  }
+
   // Parse -f(no-)?sanitize-coverage flags if coverage is supported by the
   // enabled sanitizers.
   for (const auto *Arg : Args) {
@@ -1241,6 +1252,10 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
   addSpecialCaseListOpt(Args, CmdArgs,
                         "-fsanitize-system-ignorelist=", SystemIgnorelistFiles);
 
+  if (OverflowPatternExclusions)
+    Args.AddAllArgs(CmdArgs,
+                    options::OPT_fsanitize_overflow_pattern_exclusion_EQ);
+
   if (MsanTrackOrigins)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" +
                                          Twine(MsanTrackOrigins)));
@@ -1426,6 +1441,28 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A,
   return Kinds;
 }
 
+static int parseOverflowPatternExclusionValues(const Driver &D,
+                                               const llvm::opt::Arg *A,
+                                               bool DiagnoseErrors) {
+  int Exclusions = 0;
+  for (int i = 0, n = A->getNumValues(); i != n; ++i) {
+    const char *Value = A->getValue(i);
+    int E =
+        llvm::StringSwitch<int>(Value)
+            .Case("none", LangOptionsBase::None)
+            .Case("all", LangOptionsBase::All)
+            .Case("add-overflow-test", LangOptionsBase::AddOverflowTest)
+            .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst)
+            .Case("post-decr-while", LangOptionsBase::PostDecrInWhile)
+            .Default(0);
+    if (E == 0)
+      D.Diag(clang::diag::err_drv_unsupported_option_argument)
+          << A->getSpelling() << Value;
+    Exclusions |= E;
+  }
+  return Exclusions;
+}
+
 int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A,
                           bool DiagnoseErrors) {
   assert(A->getOption().matches(options::OPT_fsanitize_coverage) ||
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 96aa930ea28612..f2bc11839edd4d 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -7769,6 +7769,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ);
   }
 
+  Args.AddAllArgs(CmdArgs,
+                  options::OPT_fsanitize_overflow_pattern_exclusion_EQ);
+
   Args.AddLastArg(CmdArgs, options::OPT_foffload_uniform_block,
                   options::OPT_fno_offload_uniform_block);
 
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index e3911c281985b7..5a5f5cb79a12f2 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -4267,6 +4267,19 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val;
   }
 
+  if (auto *A = Args.getLastArg(OPT_fsanitize_overflow_pattern_exclusion_EQ)) {
+    for (int i = 0, n = A->getNumValues(); i != n; ++i) {
+      Opts.OverflowPatternExclusionMask |=
+          llvm::StringSwitch<unsigned>(A->getValue(i))
+              .Case("none", LangOptionsBase::None)
+              .Case("all", LangOptionsBase::All)
+              .Case("add-overflow-test", LangOptionsBase::AddOverflowTest)
+              .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst)
+              .Case("post-decr-while", LangOptionsBase::PostDecrInWhile)
+              .Default(0);
+    }
+  }
+
   // Parse -fsanitize= arguments.
   parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
                       Diags, Opts.Sanitize);
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index a33f2a41a65497..8ae07907a04aba 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -1128,6 +1128,7 @@ void ASTStmtReader::VisitBinaryOperator(BinaryOperator *E) {
       (BinaryOperator::Opcode)CurrentUnpackingBits->getNextBits(/*Width=*/6));
   bool hasFP_Features = CurrentUnpackingBits->getNextBit();
   E->setHasStoredFPFeatures(hasFP_Features);
+  E->setExcludedOverflowPattern(CurrentUnpackingBits->getNextBit());
   E->setLHS(Record.readSubExpr());
   E->setRHS(Record.readSubExpr());
   E->setOperatorLoc(readSourceLocation());
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 038616a675b727..c292d0a789c7cd 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -1063,6 +1063,7 @@ void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) {
   CurrentPackingBits.addBits(E->getOpcode(), /*Width=*/6);
   bool HasFPFeatures = E->hasStoredFPFeatures();
   CurrentPackingBits.addBit(HasFPFeatures);
+  CurrentPackingBits.addBit(E->hasExcludedOverflowPattern());
   Record.AddStmt(E->getLHS());
   Record.AddStmt(E->getRHS());
   Record.AddSourceLocation(E->getOperatorLoc());
diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
new file mode 100644
index 00000000000000..d21405c56beab3
--- /dev/null
+++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
@@ -0,0 +1,83 @@
+// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s
+
+// Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns
+extern unsigned a, b, c;
+extern int u, v, w;
+
+extern unsigned some(void);
+
+// Make sure all these still have handler paths, we shouldn't be excluding
+// instrumentation of any "near" patterns.
+// CHECK-LABEL: close_but_not_quite
+void close_but_not_quite(void) {
+  // CHECK: br i1{{.*}}handler.
+  if (a + b > a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a - b < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + b < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + b + 1 < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  // CHECK: br i1{{.*}}handler.
+  if (a + b < a + 1)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (b >= a + b)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + a < a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler.
+  if (a + b == a)
+    c = 9;
+
+  // CHECK: br i1{{.*}}handler
+  // Although this can never actually overflow we are still checking that the
+  // sanitizer instruments it.
+  while (--a)
+    some();
+}
+
+// cvise'd kernel code that caused problems during development
+typedef unsigned _size_t;
+typedef enum { FSE_repeat_none } FSE_repeat;
+typedef enum { ZSTD_defaultAllowed } ZSTD_defaultPolicy_e;
+FSE_repeat ZSTD_selectEncodingType_repeatMode;
+ZSTD_defaultPolicy_e ZSTD_selectEncodingType_isDefaultAllowed;
+_size_t ZSTD_NCountCost(void);
+
+// CHECK-LABEL: ZSTD_selectEncodingType
+// CHECK: br i1{{.*}}handler
+void ZSTD_selectEncodingType(void) {
+  _size_t basicCost =
+             ZSTD_selectEncodingType_isDefaultAllowed ? ZSTD_NCountCost() : 0,
+         compressedCost = 3 + ZSTD_NCountCost();
+  if (basicCost <= compressedCost)
+    ZSTD_selectEncodingType_repeatMode = FSE_repeat_none;
+}
+
+// CHECK-LABEL: function_calls
+void function_calls(void) {
+  // CHECK: br i1{{.*}}handler
+  if (some() + b < some())
+    c = 9;
+}
+
+// CHECK-LABEL: not_quite_a_negated_unsigned_const
+void not_quite_a_negated_unsigned_const(void) {
+  // CHECK: br i1{{.*}}handler
+  a = -b;
+}
diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c
new file mode 100644
index 00000000000000..7c8c4af61029de
--- /dev/null
+++ b/clang/test/CodeGen/overflow-idiom-exclusion.c
@@ -0,0 +1,151 @@
+// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - | FileCheck %s --check-prefix=ADD
+// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - | FileCheck %s --check-prefix=NEGATE
+// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - | FileCheck %s --check-prefix=WHILE
+
+// Ensure some common overflow-dependent or overflow-prone code patterns don't
+// trigger the overflow sanitizers. In many cases, overflow warnings caused by
+// these patterns are seen as "noise" and result in users turning off
+// sanitization all together.
+
+// A pattern like "if (a + b < a)" simply checks for overflow and usually means
+// the user is trying to handle it gracefully.
+
+// Similarly, a pattern resembling "while (i--)" is extremely common and
+// warning on its inevitable overflow can be seen as superfluous. Do note that
+// using "i" in future calculations can be tricky because it will still
+// wrap-around.
+
+// Another common pattern that, in some cases, is found to be too noisy is
+// unsigned negation, for example:
+// unsigned long A = -1UL;
+
+
+// CHECK-NOT: handle{{.*}}overflow
+
+// ADD: usub.with.overflow
+// ADD: negate_overflow
+// ADD-NOT: handler.add_overflow
+
+// NEGATE: handler.add_overflow
+// NEGATE: usub.with.overflow
+// NEGATE-NOT: negate_overflow
+
+// WHILE: handler.add_overflow
+// WHILE: negate_overflow
+// WHILE-NOT: usub.with.overflow
+extern unsigned a, b, c;
+extern unsigned some(void);
+
+void basic_commutativity(void) {
+  if (a + b < a)
+    c = 9;
+  if (a + b < b)
+    c = 9;
+  if (b + a < b)
+    c = 9;
+  if (b + a < a)
+    c = 9;
+  if (a > a + b)
+    c = 9;
+  if (a > b + a)
+    c = 9;
+  if (b > a + b)
+    c = 9;
+  if (b > b + a)
+    c = 9;
+}
+
+void arguments_and_commutativity(unsigned V1, unsigned V2) {
+  if (V1 + V2 < V1)
+    c = 9;
+  if (V1 + V2 < V2)
+    c = 9;
+  if (V2 + V1 < V2)
+    c = 9;
+  if (V2 + V1 < V1)
+    c = 9;
+  if (V1 > V1 + V2)
+    c = 9;
+  if (V1 > V2 + V1)
+    c = 9;
+  if (V2 > V1 + V2)
+    c = 9;
+  if (V2 > V2 + V1)
+    c = 9;
+}
+
+void pointers(unsigned *P1, unsigned *P2, unsigned V1) {
+  if (*P1 + *P2 < *P1)
+    c = 9;
+  if (*P1 + V1 < V1)
+    c = 9;
+  if (V1 + *P2 < *P2)
+    c = 9;
+}
+
+struct OtherStruct {
+  unsigned foo, bar;
+};
+
+struct MyStruct {
+  unsigned base, offset;
+  struct OtherStruct os;
+};
+
+extern struct MyStruct ms;
+
+void structs(void) {
+  if (ms.base + ms.offset < ms.base)
+    c = 9;
+}
+
+void nestedstructs(void) {
+  if (ms.os.foo + ms.os.bar < ms.os.foo)
+    c = 9;
+}
+
+// Normally, this would be folded into a simple call to the overflow handler
+// and a store. Excluding this pattern results in just a store.
+void constants(void) {
+  unsigned base = 4294967295;
+  unsigned offset = 1;
+  if (base + offset < base)
+    c = 9;
+}
+
+void common_while(unsigned i) {
+  // This post-decrement usually causes overflow sanitizers to trip on the very
+  // last operation.
+  while (i--) {
+    some();
+  }
+}
+
+// Normally, these assignments would trip the unsigned overflow sanitizer.
+void negation(void) {
+#define SOME -1UL
+  unsigned long A = -1UL;
+  unsigned long B = -2UL;
+  unsigned long C = -3UL;
+  unsigned long D = -SOME;
+  (void)A;(void)B;(void)C;(void)D;
+}
+
+// cvise'd kernel code that caused problems during development due to sign
+// extension
+typedef unsigned long _size_t;
+int qnbytes;
+int *key_alloc_key;
+_size_t key_alloc_quotalen;
+int *key_alloc(void) {
+  if (qnbytes + key_alloc_quotalen < qnbytes)
+    return key_alloc_key;
+  return key_alloc_key + 3;;
+}
+
+void function_call(void) {
+  if (b + some() < b)
+    c = 9;
+}

From 9a9ca9850f3c6b278e052745f51a87296d9fedd2 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>
Date: Wed, 14 Aug 2024 17:20:25 -0700
Subject: [PATCH 12/47] [mlir][MemRef] Add more ops to narrow type support,
 strided metadata expansion (#102228)

- Add support fef memory_space_cast to strided metadata expansion and
narrow type emulation
- Add support for expand_shape to narrow type emulation (like
collapse_shape, it's a noop after linearization) and to
expand-strided-metadata (mirroring the collapse_shape pattern)
- Add support for memref.dealloc to narrow type emulation (it is a
trivial rewrite) and for memref.copy (which is unsupported when it is
used for a layout change but a trivial rewrite otherwise)
---
 .../MemRef/Transforms/EmulateNarrowType.cpp   | 93 ++++++++++++++++++-
 .../Transforms/ExpandStridedMetadata.cpp      | 87 +++++++++++++++++
 .../Dialect/MemRef/emulate-narrow-type.mlir   | 68 ++++++++++++++
 .../MemRef/expand-strided-metadata.mlir       | 38 ++++++++
 4 files changed, 283 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
index 88d56a8fbec749..a45b79194a7580 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
@@ -234,6 +234,46 @@ struct ConvertMemRefAssumeAlignment final
   }
 };
 
+//===----------------------------------------------------------------------===//
+// ConvertMemRefCopy
+//===----------------------------------------------------------------------===//
+
+struct ConvertMemRefCopy final : OpConversionPattern<memref::CopyOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::CopyOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto maybeRankedSource = dyn_cast<MemRefType>(op.getSource().getType());
+    auto maybeRankedDest = dyn_cast<MemRefType>(op.getTarget().getType());
+    if (maybeRankedSource && maybeRankedDest &&
+        maybeRankedSource.getLayout() != maybeRankedDest.getLayout())
+      return rewriter.notifyMatchFailure(
+          op, llvm::formatv("memref.copy emulation with distinct layouts ({0} "
+                            "and {1}) is currently unimplemented",
+                            maybeRankedSource.getLayout(),
+                            maybeRankedDest.getLayout()));
+    rewriter.replaceOpWithNewOp<memref::CopyOp>(op, adaptor.getSource(),
+                                                adaptor.getTarget());
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// ConvertMemRefDealloc
+//===----------------------------------------------------------------------===//
+
+struct ConvertMemRefDealloc final : OpConversionPattern<memref::DeallocOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::DeallocOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    rewriter.replaceOpWithNewOp<memref::DeallocOp>(op, adaptor.getMemref());
+    return success();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // ConvertMemRefLoad
 //===----------------------------------------------------------------------===//
@@ -300,6 +340,30 @@ struct ConvertMemRefLoad final : OpConversionPattern<memref::LoadOp> {
   }
 };
 
+//===----------------------------------------------------------------------===//
+// ConvertMemRefMemorySpaceCast
+//===----------------------------------------------------------------------===//
+
+struct ConvertMemRefMemorySpaceCast final
+    : OpConversionPattern<memref::MemorySpaceCastOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::MemorySpaceCastOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    Type newTy = getTypeConverter()->convertType(op.getDest().getType());
+    if (!newTy) {
+      return rewriter.notifyMatchFailure(
+          op->getLoc(), llvm::formatv("failed to convert memref type: {0}",
+                                      op.getDest().getType()));
+    }
+
+    rewriter.replaceOpWithNewOp<memref::MemorySpaceCastOp>(op, newTy,
+                                                           adaptor.getSource());
+    return success();
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // ConvertMemRefReinterpretCast
 //===----------------------------------------------------------------------===//
@@ -490,6 +554,28 @@ struct ConvertMemRefCollapseShape final
   }
 };
 
+/// Emulating a `memref.expand_shape` becomes a no-op after emulation given
+/// that we flatten memrefs to a single dimension as part of the emulation and
+/// the expansion would just have been undone.
+struct ConvertMemRefExpandShape final
+    : OpConversionPattern<memref::ExpandShapeOp> {
+  using OpConversionPattern::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(memref::ExpandShapeOp expandShapeOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    Value srcVal = adaptor.getSrc();
+    auto newTy = dyn_cast<MemRefType>(srcVal.getType());
+    if (!newTy)
+      return failure();
+
+    if (newTy.getRank() != 1)
+      return failure();
+
+    rewriter.replaceOp(expandShapeOp, srcVal);
+    return success();
+  }
+};
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
@@ -502,9 +588,10 @@ void memref::populateMemRefNarrowTypeEmulationPatterns(
 
   // Populate `memref.*` conversion patterns.
   patterns.add<ConvertMemRefAllocation<memref::AllocOp>,
-               ConvertMemRefAllocation<memref::AllocaOp>,
-               ConvertMemRefCollapseShape, ConvertMemRefLoad,
-               ConvertMemrefStore, ConvertMemRefAssumeAlignment,
+               ConvertMemRefAllocation<memref::AllocaOp>, ConvertMemRefCopy,
+               ConvertMemRefDealloc, ConvertMemRefCollapseShape,
+               ConvertMemRefExpandShape, ConvertMemRefLoad, ConvertMemrefStore,
+               ConvertMemRefAssumeAlignment, ConvertMemRefMemorySpaceCast,
                ConvertMemRefSubview, ConvertMemRefReinterpretCast>(
       typeConverter, patterns.getContext());
   memref::populateResolveExtractStridedMetadataPatterns(patterns);
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
index 585c5b73814219..a2049ba4a4924d 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp
@@ -726,6 +726,41 @@ struct ExtractStridedMetadataOpCollapseShapeFolder
   }
 };
 
+/// Pattern to replace `extract_strided_metadata(expand_shape)`
+/// with the results of computing the sizes and strides on the expanded shape
+/// and dividing up dimensions into static and dynamic parts as needed.
+struct ExtractStridedMetadataOpExpandShapeFolder
+    : OpRewritePattern<memref::ExtractStridedMetadataOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(memref::ExtractStridedMetadataOp op,
+                                PatternRewriter &rewriter) const override {
+    auto expandShapeOp = op.getSource().getDefiningOp<memref::ExpandShapeOp>();
+    if (!expandShapeOp)
+      return failure();
+
+    FailureOr<StridedMetadata> stridedMetadata =
+        resolveReshapeStridedMetadata<memref::ExpandShapeOp>(
+            rewriter, expandShapeOp, getExpandedSizes, getExpandedStrides);
+    if (failed(stridedMetadata)) {
+      return rewriter.notifyMatchFailure(
+          op, "failed to resolve metadata in terms of source expand_shape op");
+    }
+
+    Location loc = expandShapeOp.getLoc();
+    SmallVector<Value> results;
+    results.push_back(stridedMetadata->basePtr);
+    results.push_back(getValueOrCreateConstantIndexOp(rewriter, loc,
+                                                      stridedMetadata->offset));
+    results.append(
+        getValueOrCreateConstantIndexOp(rewriter, loc, stridedMetadata->sizes));
+    results.append(getValueOrCreateConstantIndexOp(rewriter, loc,
+                                                   stridedMetadata->strides));
+    rewriter.replaceOp(op, results);
+    return success();
+  }
+};
+
 /// Replace `base, offset, sizes, strides =
 ///              extract_strided_metadata(allocLikeOp)`
 ///
@@ -1060,6 +1095,54 @@ class ExtractStridedMetadataOpCastFolder
   }
 };
 
+/// Replace `base, offset, sizes, strides = extract_strided_metadata(
+///      memory_space_cast(src) to dstTy)`
+/// with
+/// ```
+///    oldBase, offset, sizes, strides = extract_strided_metadata(src)
+///    destBaseTy = type(oldBase) with memory space from destTy
+///    base = memory_space_cast(oldBase) to destBaseTy
+/// ```
+///
+/// In other words, propagate metadata extraction accross memory space casts.
+class ExtractStridedMetadataOpMemorySpaceCastFolder
+    : public OpRewritePattern<memref::ExtractStridedMetadataOp> {
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult
+  matchAndRewrite(memref::ExtractStridedMetadataOp extractStridedMetadataOp,
+                  PatternRewriter &rewriter) const override {
+    Location loc = extractStridedMetadataOp.getLoc();
+    Value source = extractStridedMetadataOp.getSource();
+    auto memSpaceCastOp = source.getDefiningOp<memref::MemorySpaceCastOp>();
+    if (!memSpaceCastOp)
+      return failure();
+    auto newExtractStridedMetadata =
+        rewriter.create<memref::ExtractStridedMetadataOp>(
+            loc, memSpaceCastOp.getSource());
+    SmallVector<Value> results(newExtractStridedMetadata.getResults());
+    // As with most other strided metadata rewrite patterns, don't introduce
+    // a use of the base pointer where non existed. This needs to happen here,
+    // as opposed to in later dead-code elimination, because these patterns are
+    // sometimes used during dialect conversion (see EmulateNarrowType, for
+    // example), so adding spurious usages would cause a pre-legalization value
+    // to be live that would be dead had this pattern not run.
+    if (!extractStridedMetadataOp.getBaseBuffer().use_empty()) {
+      auto baseBuffer = results[0];
+      auto baseBufferType = cast<MemRefType>(baseBuffer.getType());
+      MemRefType::Builder newTypeBuilder(baseBufferType);
+      newTypeBuilder.setMemorySpace(
+          memSpaceCastOp.getResult().getType().getMemorySpace());
+      results[0] = rewriter.create<memref::MemorySpaceCastOp>(
+          loc, Type{newTypeBuilder}, baseBuffer);
+    } else {
+      results[0] = nullptr;
+    }
+    rewriter.replaceOp(extractStridedMetadataOp, results);
+    return success();
+  }
+};
+
 /// Replace `base, offset =
 ///            extract_strided_metadata(extract_strided_metadata(src)#0)`
 /// With
@@ -1099,11 +1182,13 @@ void memref::populateExpandStridedMetadataPatterns(
                ExtractStridedMetadataOpAllocFolder<memref::AllocOp>,
                ExtractStridedMetadataOpAllocFolder<memref::AllocaOp>,
                ExtractStridedMetadataOpCollapseShapeFolder,
+               ExtractStridedMetadataOpExpandShapeFolder,
                ExtractStridedMetadataOpGetGlobalFolder,
                RewriteExtractAlignedPointerAsIndexOfViewLikeOp,
                ExtractStridedMetadataOpReinterpretCastFolder,
                ExtractStridedMetadataOpSubviewFolder,
                ExtractStridedMetadataOpCastFolder,
+               ExtractStridedMetadataOpMemorySpaceCastFolder,
                ExtractStridedMetadataOpExtractStridedMetadataFolder>(
       patterns.getContext());
 }
@@ -1113,11 +1198,13 @@ void memref::populateResolveExtractStridedMetadataPatterns(
   patterns.add<ExtractStridedMetadataOpAllocFolder<memref::AllocOp>,
                ExtractStridedMetadataOpAllocFolder<memref::AllocaOp>,
                ExtractStridedMetadataOpCollapseShapeFolder,
+               ExtractStridedMetadataOpExpandShapeFolder,
                ExtractStridedMetadataOpGetGlobalFolder,
                ExtractStridedMetadataOpSubviewFolder,
                RewriteExtractAlignedPointerAsIndexOfViewLikeOp,
                ExtractStridedMetadataOpReinterpretCastFolder,
                ExtractStridedMetadataOpCastFolder,
+               ExtractStridedMetadataOpMemorySpaceCastFolder,
                ExtractStridedMetadataOpExtractStridedMetadataFolder>(
       patterns.getContext());
 }
diff --git a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
index a67237b5e4dd19..540da239fced08 100644
--- a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
+++ b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
@@ -6,11 +6,13 @@ func.func @memref_i8() -> i8 {
     %c3 = arith.constant 3 : index
     %m = memref.alloc() : memref<4xi8, 1>
     %v = memref.load %m[%c3] : memref<4xi8, 1>
+    memref.dealloc %m : memref<4xi8, 1>
     return %v : i8
 }
 // CHECK-LABEL: func @memref_i8()
 //       CHECK:   %[[M:.+]] = memref.alloc() : memref<4xi8, 1>
 //  CHECK-NEXT:   %[[V:.+]] = memref.load %[[M]][%{{.+}}] : memref<4xi8, 1>
+//  CHECK-NEXT:   memref.dealloc %[[M]]
 //  CHECK-NEXT:   return %[[V]]
 
 // CHECK32-LABEL: func @memref_i8()
@@ -21,6 +23,7 @@ func.func @memref_i8() -> i8 {
 //       CHECK32:   %[[CAST:.+]] = arith.index_cast %[[C24]] : index to i32
 //       CHECK32:   %[[SHIFTRT:.+]] = arith.shrsi %[[V]], %[[CAST]]
 //       CHECK32:   %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i8
+//  CHECK32-NEXT:   memref.dealloc %[[M]]
 //  CHECK32-NEXT:   return %[[TRUNC]]
 
 // -----
@@ -485,3 +488,68 @@ func.func @memref_collapse_shape_i4(%idx0 : index, %idx1 : index) -> i4 {
 //   CHECK32-NOT:     memref.collapse_shape
 //       CHECK32:     memref.load %[[ALLOC]][%{{.*}}] : memref<4096xi32>
 
+// -----
+
+func.func @memref_expand_shape_i4(%idx0 : index, %idx1 : index, %idx2 : index) -> i4 {
+  %arr = memref.alloc() : memref<256x128xi4>
+  %expand = memref.expand_shape %arr[[0, 1], [2]] output_shape [32, 8, 128] : memref<256x128xi4> into memref<32x8x128xi4>
+  %1 = memref.load %expand[%idx0, %idx1, %idx2] : memref<32x8x128xi4>
+  return %1 : i4
+}
+
+// CHECK-LABEL:   func.func @memref_expand_shape_i4(
+//       CHECK:     %[[ALLOC:.*]] = memref.alloc() : memref<16384xi8>
+//   CHECK-NOT:     memref.expand_shape
+//       CHECK:     memref.load %[[ALLOC]][%{{.*}}] : memref<16384xi8>
+
+// CHECK32-LABEL:   func.func @memref_expand_shape_i4(
+//       CHECK32:     %[[ALLOC:.*]] = memref.alloc() : memref<4096xi32>
+//   CHECK32-NOT:     memref.expand_shape
+//       CHECK32:     memref.load %[[ALLOC]][%{{.*}}] : memref<4096xi32>
+
+// -----
+
+func.func @memref_memory_space_cast_i4(%arg0: memref<32x128xi4, 1>) -> memref<32x128xi4> {
+  %cast = memref.memory_space_cast %arg0 : memref<32x128xi4, 1> to memref<32x128xi4>
+  return %cast : memref<32x128xi4>
+}
+
+// CHECK-LABEL:   func.func @memref_memory_space_cast_i4(
+//  CHECK-SAME:   %[[ARG0:.*]]: memref<2048xi8, 1>
+//       CHECK:     %[[CAST:.*]] = memref.memory_space_cast %[[ARG0]] : memref<2048xi8, 1> to memref<2048xi8>
+//       CHECK:     return %[[CAST]]
+
+// CHECK32-LABEL:   func.func @memref_memory_space_cast_i4(
+//  CHECK32-SAME:   %[[ARG0:.*]]: memref<512xi32, 1>
+//       CHECK32:     %[[CAST:.*]] = memref.memory_space_cast %[[ARG0]] : memref<512xi32, 1> to memref<512xi32>
+//       CHECK32:     return %[[CAST]]
+
+// -----
+
+func.func @memref_copy_i4(%arg0: memref<32x128xi4, 1>, %arg1: memref<32x128xi4>) {
+  memref.copy %arg0, %arg1 : memref<32x128xi4, 1> to memref<32x128xi4>
+  return
+}
+
+// CHECK-LABEL:   func.func @memref_copy_i4(
+//  CHECK-SAME:   %[[ARG0:.*]]: memref<2048xi8, 1>, %[[ARG1:.*]]: memref<2048xi8>
+//       CHECK:     memref.copy %[[ARG0]], %[[ARG1]]
+//       CHECK:     return
+
+// CHECK32-LABEL:   func.func @memref_copy_i4(
+//  CHECK32-SAME:   %[[ARG0:.*]]: memref<512xi32, 1>, %[[ARG1:.*]]: memref<512xi32>
+//       CHECK32:     memref.copy %[[ARG0]], %[[ARG1]]
+//       CHECK32:     return
+
+// -----
+
+!colMajor = memref<8x8xi4, strided<[1, 8]>>
+func.func @copy_distinct_layouts(%idx : index) -> i4 {
+  %c0 = arith.constant 0 : index
+  %arr = memref.alloc() : memref<8x8xi4>
+  %arr2 = memref.alloc() : !colMajor
+  // expected-error @+1 {{failed to legalize operation 'memref.copy' that was explicitly marked illegal}}
+  memref.copy %arr, %arr2 : memref<8x8xi4> to !colMajor
+  %ld = memref.load %arr2[%c0, %c0] : !colMajor
+  return %ld : i4
+}
diff --git a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
index d884ade3195329..8aac802ba10ae9 100644
--- a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
+++ b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir
@@ -1553,3 +1553,41 @@ func.func @extract_strided_metadata_of_collapse_shape(%base: memref<5x4xf32>)
 //   CHECK-DAG:    %[[STEP:.*]] = arith.constant 1 : index
 //       CHECK:    %[[BASE:.*]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata
 //       CHECK:    return %[[BASE]], %[[OFFSET]], %[[SIZE]], %[[STEP]] : memref<f32>, index, index, index
+
+// -----
+
+func.func @extract_strided_metadata_of_memory_space_cast(%base: memref<20xf32>)
+    -> (memref<f32, 1>, index, index, index) {
+
+  %memory_space_cast = memref.memory_space_cast %base : memref<20xf32> to memref<20xf32, 1>
+
+  %base_buffer, %offset, %size, %stride = memref.extract_strided_metadata %memory_space_cast :
+    memref<20xf32, 1> -> memref<f32, 1>, index, index, index
+
+  return %base_buffer, %offset, %size, %stride :
+    memref<f32, 1>, index, index, index
+}
+
+// CHECK-LABEL:  func @extract_strided_metadata_of_memory_space_cast
+//   CHECK-DAG:    %[[OFFSET:.*]] = arith.constant 0 : index
+//   CHECK-DAG:    %[[SIZE:.*]] = arith.constant 20 : index
+//   CHECK-DAG:    %[[STEP:.*]] = arith.constant 1 : index
+//       CHECK:    %[[BASE:.*]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata
+//       CHECK:    %[[CAST:.*]] = memref.memory_space_cast %[[BASE]]
+//       CHECK:    return %[[CAST]], %[[OFFSET]], %[[SIZE]], %[[STEP]] : memref<f32, 1>, index, index, index
+
+// -----
+
+func.func @extract_strided_metadata_of_memory_space_cast_no_base(%base: memref<20xf32>)
+    -> (index, index, index) {
+
+  %memory_space_cast = memref.memory_space_cast %base : memref<20xf32> to memref<20xf32, 1>
+
+  %base_buffer, %offset, %size, %stride = memref.extract_strided_metadata %memory_space_cast :
+    memref<20xf32, 1> -> memref<f32, 1>, index, index, index
+
+  return %offset, %size, %stride : index, index, index
+}
+
+// CHECK-LABEL:  func @extract_strided_metadata_of_memory_space_cast_no_base
+//   CHECK-NOT:  memref.memory_space_cast

From 564efe26745c5bb7236b095d5b42881cdc64a284 Mon Sep 17 00:00:00 2001
From: pcc <peter@pcc.me.uk>
Date: Wed, 14 Aug 2024 17:25:28 -0700
Subject: [PATCH 13/47] utils/git: Add linkify script.

This script linkifies (i.e. makes clickable in the terminal) text that appears
to be a pull request or issue reference (e.g. #12345 or PR12345) or a
40-character commit hash (e.g. abc123). You can configure git to automatically
send the output of commands that pipe their output through a pager, such as
`git log` and `git show`, through this script by running this command from
within your LLVM checkout:

git config core.pager 'llvm/utils/git/linkify | pager'

The pager command is run from the root of the repository even if the git
command is run from a subdirectory, so the relative path should always work.

It requires OSC 8 support in the terminal. For a list of compatible terminals,
see https://github.com/Alhadis/OSC8-Adoption

Reviewers: MaskRay

Reviewed By: MaskRay

Pull Request: https://github.com/llvm/llvm-project/pull/103496
---
 llvm/utils/git/linkify | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100755 llvm/utils/git/linkify

diff --git a/llvm/utils/git/linkify b/llvm/utils/git/linkify
new file mode 100755
index 00000000000000..9fcadd758492cf
--- /dev/null
+++ b/llvm/utils/git/linkify
@@ -0,0 +1,20 @@
+#!/bin/sh
+
+# This script linkifies (i.e. makes clickable in the terminal) text that appears
+# to be a pull request or issue reference (e.g. #12345 or PR12345) or a
+# 40-character commit hash (e.g. abc123). You can configure git to automatically
+# send the output of commands that pipe their output through a pager, such as
+# `git log` and `git show`, through this script by running this command from
+# within your LLVM checkout:
+#
+# git config core.pager 'llvm/utils/git/linkify | pager'
+#
+# The pager command is run from the root of the repository even if the git
+# command is run from a subdirectory, so the relative path should always work.
+#
+# It requires OSC 8 support in the terminal. For a list of compatible terminals,
+# see https://github.com/Alhadis/OSC8-Adoption
+
+sed \
+  -e 's,\(#\|\bPR\)\([0-9]\+\),\x1b]8;;https://github.com/llvm/llvm-project/issues/\2\x1b\\\0\x1b]8;;\x1b\\,gi' \
+  -e 's,[0-9a-f]\{40\},\x1b]8;;https://github.com/llvm/llvm-project/commit/\0\x1b\\\0\x1b]8;;\x1b\\,g'

From 7275919cd5fc89c42a52168c9f4411b4e5421c95 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Wed, 14 Aug 2024 17:46:30 -0700
Subject: [PATCH 14/47] Use clang_cc1 and specify the target explicitly.

---
 clang/test/CodeGen/overflow-idiom-exclusion-fp.c |  4 ++--
 clang/test/CodeGen/overflow-idiom-exclusion.c    | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
index d21405c56beab3..f0c4f874d59f4e 100644
--- a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
+++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
@@ -1,5 +1,5 @@
-// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s
-// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s
 
 // Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns
 extern unsigned a, b, c;
diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c
index 7c8c4af61029de..da1203c074b19f 100644
--- a/clang/test/CodeGen/overflow-idiom-exclusion.c
+++ b/clang/test/CodeGen/overflow-idiom-exclusion.c
@@ -1,8 +1,8 @@
-// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s
-// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s
-// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - | FileCheck %s --check-prefix=ADD
-// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - | FileCheck %s --check-prefix=NEGATE
-// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - | FileCheck %s --check-prefix=WHILE
+// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD
+// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE
+// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE
 
 // Ensure some common overflow-dependent or overflow-prone code patterns don't
 // trigger the overflow sanitizers. In many cases, overflow warnings caused by

From 5873aa83b871393b5ada4c2033445d5fd52d7461 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Wed, 14 Aug 2024 17:47:47 -0700
Subject: [PATCH 15/47] Remove '-emit-llvm' and use '-triple'

---
 clang/test/CodeGen/overflow-idiom-exclusion-fp.c |  4 ++--
 clang/test/CodeGen/overflow-idiom-exclusion.c    | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
index f0c4f874d59f4e..1d26caa5b4f54b 100644
--- a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
+++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s
 
 // Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns
 extern unsigned a, b, c;
diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c
index da1203c074b19f..02dd3ef3ae42da 100644
--- a/clang/test/CodeGen/overflow-idiom-exclusion.c
+++ b/clang/test/CodeGen/overflow-idiom-exclusion.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD
-// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE
-// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -o - %s | FileCheck %s --check-prefix=ADD
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -o - %s | FileCheck %s --check-prefix=NEGATE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -o - %s | FileCheck %s --check-prefix=WHILE
 
 // Ensure some common overflow-dependent or overflow-prone code patterns don't
 // trigger the overflow sanitizers. In many cases, overflow warnings caused by

From 4411d1e3926d67c393e6a7bdb910bbe77507ff26 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 14 Aug 2024 18:32:29 -0700
Subject: [PATCH 16/47] [sanitizer] Remove GetCurrentThread nullness checks
 from Allocate

The nullness check is unreachable.

* For the main thead and pthread_create created threads, the `*Allocate` functions must be called after `*_current_thread` is set.
set.
* For threads created by Linux's `clone`, static TLS is either reused or
  set to a new value (CLONE_SETTLS).

Make this change for asan/msan and possibly extend the change to other
sanitizers. (asan supports many platforms and I am not 100% certain that
all platforms have the property.)

Pull Request: https://github.com/llvm/llvm-project/pull/102828
---
 compiler-rt/lib/asan/asan_allocator.cpp | 11 ++---------
 compiler-rt/lib/msan/msan_allocator.cpp | 11 ++---------
 2 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp
index 9e66f77217ec6b..e041861edaf0b7 100644
--- a/compiler-rt/lib/asan/asan_allocator.cpp
+++ b/compiler-rt/lib/asan/asan_allocator.cpp
@@ -576,15 +576,8 @@ struct Allocator {
     }
 
     AsanThread *t = GetCurrentThread();
-    void *allocated;
-    if (t) {
-      AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
-      allocated = allocator.Allocate(cache, needed_size, 8);
-    } else {
-      SpinMutexLock l(&fallback_mutex);
-      AllocatorCache *cache = &fallback_allocator_cache;
-      allocated = allocator.Allocate(cache, needed_size, 8);
-    }
+    void *allocated = allocator.Allocate(
+        GetAllocatorCache(&t->malloc_storage()), needed_size, 8);
     if (UNLIKELY(!allocated)) {
       SetAllocatorOutOfMemory();
       if (AllocatorMayReturnNull())
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index d7d4967c949859..f478b9979f2daa 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -199,15 +199,8 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment,
     ReportRssLimitExceeded(stack);
   }
   MsanThread *t = GetCurrentThread();
-  void *allocated;
-  if (t) {
-    AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
-    allocated = allocator.Allocate(cache, size, alignment);
-  } else {
-    SpinMutexLock l(&fallback_mutex);
-    AllocatorCache *cache = &fallback_allocator_cache;
-    allocated = allocator.Allocate(cache, size, alignment);
-  }
+  void *allocated = allocator.Allocate(GetAllocatorCache(&t->malloc_storage()),
+                                       size, alignment);
   if (UNLIKELY(!allocated)) {
     SetAllocatorOutOfMemory();
     if (AllocatorMayReturnNull())

From aca01bff07c225dbace6cb7743072ddfe78c43f0 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Wed, 14 Aug 2024 18:46:25 -0700
Subject: [PATCH 17/47] [ctx_prof] CtxProfAnalysis: populate module data
 (#102930)

Continuing from #102084, which introduced the analysis, we now populate
it with info about functions contained in the module.

When we will update the profile due to e.g. inlined callsites, we'll
ingest the callee's counters and callsites to the caller. We'll move
those to the caller's respective index space (counter and callers), so
we need to know and maintain where those currently end.

We also don't need to keep profiles not pertinent to this module.

This patch also introduces an arguably much simpler way to track the
GUID of a function from the frontend compilation, through ThinLTO, and
into the post-thinlink compilation step, which doesn't rely on keeping
names around. A separate RFC and patches will discuss extending this to
the current PGO (instrumented and sampled) and other consumers as an
infrastructural component.
---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  |  60 ++++++++-
 llvm/lib/Analysis/CtxProfAnalysis.cpp         |  93 +++++++++++++-
 llvm/lib/Passes/PassBuilderPipelines.cpp      |   4 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 .../Instrumentation/PGOCtxProfLowering.cpp    |   6 +-
 .../Analysis/CtxProfAnalysis/full-cycle.ll    | 119 ++++++++++++++++++
 llvm/test/Analysis/CtxProfAnalysis/load.ll    | 113 +++++++++++++----
 .../PGOProfile/ctx-instrumentation.ll         |  26 ++--
 .../PGOProfile/ctx-prof-use-prelink.ll        |   4 +-
 9 files changed, 385 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index d77c81d03582e1..f0e2aeb0f92f74 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -9,10 +9,10 @@
 #ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H
 #define LLVM_ANALYSIS_CTXPROFANALYSIS_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/ProfileData/PGOCtxProfReader.h"
-#include <map>
 
 namespace llvm {
 
@@ -20,12 +20,28 @@ class CtxProfAnalysis;
 
 /// The instrumented contextual profile, produced by the CtxProfAnalysis.
 class PGOContextualProfile {
+  friend class CtxProfAnalysis;
+  friend class CtxProfAnalysisPrinterPass;
+  struct FunctionInfo {
+    uint32_t NextCounterIndex = 0;
+    uint32_t NextCallsiteIndex = 0;
+    const std::string Name;
+
+    FunctionInfo(StringRef Name) : Name(Name) {}
+  };
   std::optional<PGOCtxProfContext::CallTargetMapTy> Profiles;
+  // For the GUIDs in this module, associate metadata about each function which
+  // we'll need when we maintain the profiles during IPO transformations.
+  DenseMap<GlobalValue::GUID, FunctionInfo> FuncInfo;
 
-public:
-  explicit PGOContextualProfile(PGOCtxProfContext::CallTargetMapTy &&Profiles)
-      : Profiles(std::move(Profiles)) {}
+  /// Get the GUID of this Function if it's defined in this module.
+  GlobalValue::GUID getDefinedFunctionGUID(const Function &F) const;
+
+  // This is meant to be constructed from CtxProfAnalysis, which will also set
+  // its state piecemeal.
   PGOContextualProfile() = default;
+
+public:
   PGOContextualProfile(const PGOContextualProfile &) = delete;
   PGOContextualProfile(PGOContextualProfile &&) = default;
 
@@ -35,6 +51,20 @@ class PGOContextualProfile {
     return *Profiles;
   }
 
+  bool isFunctionKnown(const Function &F) const {
+    return getDefinedFunctionGUID(F) != 0;
+  }
+
+  uint32_t allocateNextCounterIndex(const Function &F) {
+    assert(isFunctionKnown(F));
+    return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++;
+  }
+
+  uint32_t allocateNextCallsiteIndex(const Function &F) {
+    assert(isFunctionKnown(F));
+    return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex++;
+  }
+
   bool invalidate(Module &, const PreservedAnalyses &PA,
                   ModuleAnalysisManager::Invalidator &) {
     // Check whether the analysis has been explicitly invalidated. Otherwise,
@@ -66,5 +96,27 @@ class CtxProfAnalysisPrinterPass
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
   static bool isRequired() { return true; }
 };
+
+/// Assign a GUID to functions as metadata. GUID calculation takes linkage into
+/// account, which may change especially through and after thinlto. By
+/// pre-computing and assigning as metadata, this mechanism is resilient to such
+/// changes (as well as name changes e.g. suffix ".llvm." additions).
+
+// FIXME(mtrofin): we can generalize this mechanism to calculate a GUID early in
+// the pass pipeline, associate it with any Global Value, and then use it for
+// PGO and ThinLTO.
+// At that point, this should be moved elsewhere.
+class AssignGUIDPass : public PassInfoMixin<AssignGUIDPass> {
+public:
+  explicit AssignGUIDPass() = default;
+
+  /// Assign a GUID *if* one is not already assign, as a function metadata named
+  /// `GUIDMetadataName`.
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+  static const char *GUIDMetadataName;
+  // This should become GlobalValue::getGUID
+  static uint64_t getGUID(const Function &F);
+};
+
 } // namespace llvm
 #endif // LLVM_ANALYSIS_CTXPROFANALYSIS_H
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index fbae705127538a..5bf336dd311158 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -14,12 +14,14 @@
 #include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/Analysis.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/ProfileData/PGOCtxProfReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 
 #define DEBUG_TYPE "ctx_prof"
 
@@ -64,10 +66,39 @@ Value toJSON(const PGOCtxProfContext::CallTargetMapTy &P) {
 } // namespace json
 } // namespace llvm
 
+const char *AssignGUIDPass::GUIDMetadataName = "guid";
+
+PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
+  for (auto &F : M.functions()) {
+    if (F.isDeclaration())
+      continue;
+    if (F.getMetadata(GUIDMetadataName))
+      continue;
+    const GlobalValue::GUID GUID = F.getGUID();
+    F.setMetadata(GUIDMetadataName,
+                  MDNode::get(M.getContext(),
+                              {ConstantAsMetadata::get(ConstantInt::get(
+                                  Type::getInt64Ty(M.getContext()), GUID))}));
+  }
+  return PreservedAnalyses::none();
+}
+
+GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) {
+  if (F.isDeclaration()) {
+    assert(GlobalValue::isExternalLinkage(F.getLinkage()));
+    return GlobalValue::getGUID(F.getGlobalIdentifier());
+  }
+  auto *MD = F.getMetadata(GUIDMetadataName);
+  assert(MD && "guid not found for defined function");
+  return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
+                               ->getValue()
+                               ->stripPointerCasts())
+      ->getZExtValue();
+}
 AnalysisKey CtxProfAnalysis::Key;
 
-CtxProfAnalysis::Result CtxProfAnalysis::run(Module &M,
-                                             ModuleAnalysisManager &MAM) {
+PGOContextualProfile CtxProfAnalysis::run(Module &M,
+                                          ModuleAnalysisManager &MAM) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFile(Profile);
   if (auto EC = MB.getError()) {
     M.getContext().emitError("could not open contextual profile file: " +
@@ -81,7 +112,55 @@ CtxProfAnalysis::Result CtxProfAnalysis::run(Module &M,
                              toString(MaybeCtx.takeError()));
     return {};
   }
-  return Result(std::move(*MaybeCtx));
+
+  PGOContextualProfile Result;
+
+  for (const auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+    auto GUID = AssignGUIDPass::getGUID(F);
+    assert(GUID && "guid not found for defined function");
+    const auto &Entry = F.begin();
+    uint32_t MaxCounters = 0; // we expect at least a counter.
+    for (const auto &I : *Entry)
+      if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
+        MaxCounters =
+            static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
+        break;
+      }
+    if (!MaxCounters)
+      continue;
+    uint32_t MaxCallsites = 0;
+    for (const auto &BB : F)
+      for (const auto &I : BB)
+        if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
+          MaxCallsites =
+              static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
+          break;
+        }
+    auto [It, Ins] = Result.FuncInfo.insert(
+        {GUID, PGOContextualProfile::FunctionInfo(F.getName())});
+    (void)Ins;
+    assert(Ins);
+    It->second.NextCallsiteIndex = MaxCallsites;
+    It->second.NextCounterIndex = MaxCounters;
+  }
+  // If we made it this far, the Result is valid - which we mark by setting
+  // .Profiles.
+  // Trim first the roots that aren't in this module.
+  DenseSet<GlobalValue::GUID> ProfiledGUIDs;
+  for (auto &[RootGuid, _] : llvm::make_early_inc_range(*MaybeCtx))
+    if (!Result.FuncInfo.contains(RootGuid))
+      MaybeCtx->erase(RootGuid);
+  Result.Profiles = std::move(*MaybeCtx);
+  return Result;
+}
+
+GlobalValue::GUID
+PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {
+  if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())
+    return It->first;
+  return 0;
 }
 
 PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
@@ -91,8 +170,16 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
     M.getContext().emitError("Invalid CtxProfAnalysis");
     return PreservedAnalyses::all();
   }
+
+  OS << "Function Info:\n";
+  for (const auto &[Guid, FuncInfo] : C.FuncInfo)
+    OS << Guid << " : " << FuncInfo.Name
+       << ". MaxCounterID: " << FuncInfo.NextCounterIndex
+       << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
+
   const auto JSONed = ::llvm::json::toJSON(C.profiles());
 
+  OS << "\nCurrent Profile:\n";
   OS << formatv("{0:2}", JSONed);
   OS << "\n";
   return PreservedAnalyses::all();
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 6927a2886b962b..0201e69f3e216a 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -1196,6 +1197,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
     // In pre-link, we just want the instrumented IR. We use the contextual
     // profile in the post-thinlink phase.
     // The instrumentation will be removed in post-thinlink after IPO.
+    // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this
+    // mechanism for GUIDs.
+    MPM.addPass(AssignGUIDPass());
     if (IsCtxProfUse)
       return MPM;
     addPostPGOLoopRotation(MPM, Level);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 4fdded7b82f36b..18f4aa19224da0 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -46,6 +46,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
 #endif
 MODULE_PASS("always-inline", AlwaysInlinerPass())
 MODULE_PASS("annotation2metadata", Annotation2MetadataPass())
+MODULE_PASS("assign-guid", AssignGUIDPass())
 MODULE_PASS("attributor", AttributorPass())
 MODULE_PASS("attributor-light", AttributorLightPass())
 MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index d6ba12465bb328..9b10cbba84075a 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -8,6 +8,7 @@
 //
 
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/Analysis.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -16,6 +17,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/CommandLine.h"
 #include <utility>
 
@@ -223,8 +225,8 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) {
       assert(Mark->getIndex()->isZero());
 
       IRBuilder<> Builder(Mark);
-      // FIXME(mtrofin): use InstrProfSymtab::getCanonicalName
-      Guid = Builder.getInt64(F.getGUID());
+
+      Guid = Builder.getInt64(AssignGUIDPass::getGUID(F));
       // The type of the context of this function is now knowable since we have
       // NrCallsites and NrCounters. We delcare it here because it's more
       // convenient - we have the Builder.
diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
new file mode 100644
index 00000000000000..0cdf82bd96efcb
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll
@@ -0,0 +1,119 @@
+; REQUIRES: x86_64-linux
+;
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+;
+; Test that the GUID metadata survives through thinlink.
+;
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+;
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata -o %t/m1.bc %t/m1.ll
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata -o %t/m2.bc %t/m2.ll
+;
+; RUN: rm -rf %t/postlink
+; RUN: mkdir %t/postlink
+;
+;
+; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc -o %t/ -thinlto-distributed-indexes \
+; RUN:  -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN:  -r %t/m1.bc,f1,plx \
+; RUN:  -r %t/m2.bc,f1 \
+; RUN:  -r %t/m2.bc,entrypoint,plx
+; RUN: opt --passes='function-import,require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
+; RUN:  -summary-file=%t/m2.bc.thinlto.bc -use-ctx-profile=%t/profile.ctxprofdata %t/m2.bc \
+; RUN:  -S -o %t/m2.post.ll 2> %t/profile.txt
+; RUN: diff %t/expected.txt %t/profile.txt
+;--- m1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+source_filename = "random_path/m1.cc"
+
+define private void @f2() #0 !guid !0 {
+  ret void
+}
+
+define void @f1() #0 {
+  call void @f2()
+  ret void
+}
+
+attributes #0 = { noinline }
+!0 = !{ i64 3087265239403591524 }
+
+;--- m2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+source_filename = "random_path/m2.cc"
+
+declare void @f1()
+
+define void @entrypoint() {
+  call void @f1()
+  ret void
+}
+;--- profile.json
+[
+  {
+    "Callsites": [
+      [
+        {
+          "Callsites": [
+            [
+              {
+                "Counters": [
+                  10
+                ],
+                "Guid": 3087265239403591524
+              }
+            ]
+          ],
+          "Counters": [
+            7
+          ],
+          "Guid": 2072045998141807037
+        }
+      ]
+    ],
+    "Counters": [
+      1
+    ],
+    "Guid": 10507721908651011566
+  }
+]
+;--- expected.txt
+Function Info:
+10507721908651011566 : entrypoint. MaxCounterID: 1. MaxCallsiteID: 1
+3087265239403591524 : f2.llvm.0. MaxCounterID: 1. MaxCallsiteID: 0
+2072045998141807037 : f1. MaxCounterID: 1. MaxCallsiteID: 1
+
+Current Profile:
+[
+  {
+    "Callsites": [
+      [
+        {
+          "Callsites": [
+            [
+              {
+                "Counters": [
+                  10
+                ],
+                "Guid": 3087265239403591524
+              }
+            ]
+          ],
+          "Counters": [
+            7
+          ],
+          "Guid": 2072045998141807037
+        }
+      ]
+    ],
+    "Counters": [
+      1
+    ],
+    "Guid": 10507721908651011566
+  }
+]
diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll
index 9cd78cfef187ba..69806e334aaec9 100644
--- a/llvm/test/Analysis/CtxProfAnalysis/load.ll
+++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll
@@ -1,16 +1,22 @@
 ; REQUIRES: x86_64-linux
-
+;
+; RUN: rm -rf %t
 ; RUN: split-file %s %t
 ; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
 ; RUN: not opt -passes='require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
-; RUN:   %t/empty.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
+; RUN:   %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
 
 ; RUN: not opt -passes='require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
-; RUN:   -use-ctx-profile=does_not_exist.ctxprofdata %t/empty.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
+; RUN:   -use-ctx-profile=does_not_exist.ctxprofdata %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE
 
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' \
+; RUN:   -use-ctx-profile=%t/profile.ctxprofdata %t/example.ll -S -o %t/prelink.ll
+
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN:  %t/example.ll -S -o %t/prelink.ll
 ; RUN: opt -passes='require<ctx-prof-analysis>,print<ctx-prof-analysis>' \
-; RUN:   -use-ctx-profile=%t/profile.ctxprofdata %t/empty.ll -S 2> %t/output.json
-; RUN: diff %t/profile.json %t/output.json
+; RUN:   -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S 2> %t/output.txt
+; RUN: diff %t/expected-profile-output.txt %t/output.txt
 
 ; NO-FILE: error: could not open contextual profile file
 ;
@@ -18,41 +24,104 @@
 ; output it from opt.
 ;--- profile.json
 [
+  {
+    "Counters": [
+      9
+    ],
+    "Guid": 12341
+  },
+  {
+    "Counters": [
+      5
+    ],
+    "Guid": 12074870348631550642
+  },
   {
     "Callsites": [
-      [],
       [
         {
           "Counters": [
-            4,
-            5
+            6,
+            7
           ],
-          "Guid": 2000
-        },
+          "Guid": 728453322856651412
+        }
+      ]
+    ],
+    "Counters": [
+      1
+    ],
+    "Guid": 11872291593386833696
+  }
+]
+;--- expected-profile-output.txt
+Function Info:
+4909520559318251808 : an_entrypoint. MaxCounterID: 2. MaxCallsiteID: 1
+12074870348631550642 : another_entrypoint_no_callees. MaxCounterID: 1. MaxCallsiteID: 0
+11872291593386833696 : foo. MaxCounterID: 1. MaxCallsiteID: 1
+
+Current Profile:
+[
+  {
+    "Callsites": [
+      [
         {
           "Counters": [
             6,
-            7,
-            8
+            7
           ],
-          "Guid": 18446744073709551613
+          "Guid": 728453322856651412
         }
       ]
     ],
     "Counters": [
-      1,
-      2,
-      3
+      1
     ],
-    "Guid": 1000
+    "Guid": 11872291593386833696
   },
   {
     "Counters": [
-      5,
-      9,
-      10
+      5
     ],
-    "Guid": 18446744073709551612
+    "Guid": 12074870348631550642
   }
 ]
-;--- empty.ll
+;--- example.ll
+declare void @bar()
+
+define private void @foo(i32 %a, ptr %fct) #0 !guid !0 {
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+yes:
+  call void %fct(i32 %a)
+  br label %exit
+no:
+  call void @bar()
+  br label %exit
+exit:
+  ret void
+}
+
+define void @an_entrypoint(i32 %a) {
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+
+yes:
+  call void @foo(i32 1, ptr null)
+  ret void
+no:
+  ret void
+}
+
+define void @another_entrypoint_no_callees(i32 %a) {
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+
+yes:
+  ret void
+no:
+  ret void
+}
+
+attributes #0 = { noinline }
+!0 = !{ i64 11872291593386833696 }
\ No newline at end of file
diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
index 56c7c7519f6945..a70f94e1521f0d 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
 ; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \
 ; RUN:   -S < %s | FileCheck --check-prefix=INSTRUMENT %s
-; RUN: opt -passes=pgo-instr-gen,ctx-instr-lower -profile-context-root=an_entrypoint \
+; RUN: opt -passes=pgo-instr-gen,assign-guid,ctx-instr-lower -profile-context-root=an_entrypoint \
 ; RUN:   -profile-context-root=another_entrypoint_no_callees \
 ; RUN:   -S < %s | FileCheck --check-prefix=LOWERING %s
 
@@ -46,7 +46,7 @@ define void @foo(i32 %a, ptr %fct) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @foo(
-; LOWERING-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) !guid [[META0:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @foo, i64 6699318081062747564, i32 2, i32 2)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
@@ -104,7 +104,7 @@ define void @an_entrypoint(i32 %a) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @an_entrypoint(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META1:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @an_entrypoint_ctx_root, i64 4909520559318251808, i32 2, i32 1)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
@@ -154,7 +154,7 @@ define void @another_entrypoint_no_callees(i32 %a) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @another_entrypoint_no_callees(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META2:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @another_entrypoint_no_callees_ctx_root, i64 -6371873725078000974, i32 2, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
@@ -188,7 +188,7 @@ define void @simple(i32 %a) {
 ; INSTRUMENT-NEXT:    ret void
 ;
 ; LOWERING-LABEL: define void @simple(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META3:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @simple, i64 -3006003237940970099, i32 1, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
@@ -212,7 +212,7 @@ define i32 @no_callsites(i32 %a) {
 ; INSTRUMENT-NEXT:    ret i32 0
 ;
 ; LOWERING-LABEL: define i32 @no_callsites(
-; LOWERING-SAME: i32 [[A:%.*]]) {
+; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META4:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], -2
@@ -243,7 +243,8 @@ define void @no_counters() {
 ; INSTRUMENT-NEXT:    call void @bar()
 ; INSTRUMENT-NEXT:    ret void
 ;
-; LOWERING-LABEL: define void @no_counters() {
+; LOWERING-LABEL: define void @no_counters(
+; LOWERING-SAME: ) !guid [[META5:![0-9]+]] {
 ; LOWERING-NEXT:    [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_counters, i64 5458232184388660970, i32 1, i32 1)
 ; LOWERING-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
 ; LOWERING-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
@@ -263,8 +264,15 @@ define void @no_counters() {
   ret void
 }
 ;.
-; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
-;.
 ; LOWERING: attributes #[[ATTR0:[0-9]+]] = { nounwind }
 ; LOWERING: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
+; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.
+; LOWERING: [[META0]] = !{i64 6699318081062747564}
+; LOWERING: [[META1]] = !{i64 4909520559318251808}
+; LOWERING: [[META2]] = !{i64 -6371873725078000974}
+; LOWERING: [[META3]] = !{i64 -3006003237940970099}
+; LOWERING: [[META4]] = !{i64 5679753335911435902}
+; LOWERING: [[META5]] = !{i64 5458232184388660970}
+;.
diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
index 18ac2f92aa39d4..cb8ab78dc0f414 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
@@ -12,7 +12,7 @@ declare void @bar()
 ;.
 define void @foo(i32 %a, ptr %fct) {
 ; CHECK-LABEL: define void @foo(
-; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
+; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr !guid [[META0:![0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
 ; CHECK-NEXT:    [[T:%.*]] = icmp eq i32 [[A]], 0
 ; CHECK-NEXT:    br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
@@ -42,3 +42,5 @@ exit:
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
 ;.
+; CHECK: [[META0]] = !{i64 6699318081062747564}
+;.

From bd47ba705607033c3dab0037a8d64159b58bedb4 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Thu, 15 Aug 2024 02:50:07 +0100
Subject: [PATCH 18/47] [ELF][NFC] Allow non-GotSection for
 addAddendOnlyRelocIfNonPreemptible (#104228)

This was done as an afterthought in c3c9e4531287 without justification.
Nothing relies on it being a specific kind of section, and downstream in
CHERI LLVM we pass a non-GotSection to this function. Thus revert this
overly-restrictive change and allow downstreams to pass other section
types again.

This partially reverts commit c3c9e45312874ff890723f54cabfd41e43b2dbc4.
---
 lld/ELF/SyntheticSections.cpp | 6 +++---
 lld/ELF/SyntheticSections.h   | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index c27ab2b67dc2b2..7d26fa9aea74ab 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -1639,14 +1639,14 @@ void RelocationBaseSection::addSymbolReloc(
 }
 
 void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible(
-    RelType dynType, GotSection &sec, uint64_t offsetInSec, Symbol &sym,
+    RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym,
     RelType addendRelType) {
   // No need to write an addend to the section for preemptible symbols.
   if (sym.isPreemptible)
-    addReloc({dynType, &sec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0,
+    addReloc({dynType, &isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0,
               R_ABS});
   else
-    addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, sec, offsetInSec,
+    addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, offsetInSec,
              sym, 0, R_ABS, addendRelType);
 }
 
diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h
index d4169e1e1acaf6..43eb82cbb3e28b 100644
--- a/lld/ELF/SyntheticSections.h
+++ b/lld/ELF/SyntheticSections.h
@@ -523,7 +523,8 @@ class RelocationBaseSection : public SyntheticSection {
   }
   /// Add a dynamic relocation using the target address of \p sym as the addend
   /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym.
-  void addAddendOnlyRelocIfNonPreemptible(RelType dynType, GotSection &sec,
+  void addAddendOnlyRelocIfNonPreemptible(RelType dynType,
+                                          InputSectionBase &isec,
                                           uint64_t offsetInSec, Symbol &sym,
                                           RelType addendRelType);
   template <bool shard = false>

From 0df91893efc752a76c7bbe6b063d66c8a2fa0d55 Mon Sep 17 00:00:00 2001
From: alx32 <103613512+alx32@users.noreply.github.com>
Date: Wed, 14 Aug 2024 19:30:41 -0700
Subject: [PATCH 19/47] [lld-macho] Fix crash: ObjC category merge + relative
 method lists (#104081)

A crash was happening when both ObjC Category Merging and Relative
method lists were enabled.

ObjC Category Merging creates new data sections and adds them by calling
`addInputSection`. `addInputSection` uses the symbols within the added
section to determine which container to actually add the section to.

The issue is that ObjC Category merging is calling `addInputSection`
before actually adding the relevant symbols the the added section. This
causes `addInputSection` to add the `InputSection` to the wrong
container, eventually resulting in a crash.

To fix this, we ensure that ObjC Category Merging calls
`addInputSection` only after the symbols have been added to the
`InputSection`.
---
 lld/MachO/ObjC.cpp                            | 10 +++++-----
 .../MachO/objc-category-merging-minimal.s     | 20 +++++++++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp
index 18b7521ed1ad2a..b9f7592fa9c663 100644
--- a/lld/MachO/ObjC.cpp
+++ b/lld/MachO/ObjC.cpp
@@ -851,7 +851,6 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
       infoCategoryWriter.catPtrListInfo.align);
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
   listSec->live = true;
-  addInputSection(listSec);
 
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
 
@@ -867,6 +866,7 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
 
   ptrListSym->used = true;
   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+  addInputSection(listSec);
 
   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -911,7 +911,6 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
       infoCategoryWriter.catPtrListInfo.align);
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
   listSec->live = true;
-  addInputSection(listSec);
 
   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
 
@@ -927,6 +926,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList(
 
   ptrListSym->used = true;
   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
+  addInputSection(listSec);
 
   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -952,7 +952,6 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
                                bodyData, infoCategoryWriter.catListInfo.align);
   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
   newCatList->live = true;
-  addInputSection(newCatList);
 
   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
 
@@ -968,6 +967,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
 
   catListSym->used = true;
   objFile->symbols.push_back(catListSym);
+  addInputSection(newCatList);
   return catListSym;
 }
 
@@ -990,7 +990,6 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
                                bodyData, infoCategoryWriter.catBodyInfo.align);
   newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
   newBodySec->live = true;
-  addInputSection(newBodySec);
 
   std::string symName =
       objc::symbol_names::category + baseClassName + "(" + name + ")";
@@ -1003,6 +1002,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
 
   catBodySym->used = true;
   objFile->symbols.push_back(catBodySym);
+  addInputSection(newBodySec);
 
   createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
                         infoCategoryWriter.catBodyInfo.relocTemplate);
@@ -1223,7 +1223,6 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
           infoCategoryWriter.catListInfo.align);
       listSec->parent = infoCategoryWriter.catListInfo.outputSection;
       listSec->live = true;
-      addInputSection(listSec);
 
       std::string slotSymName = "<__objc_catlist slot for category ";
       slotSymName += nonErasedCatBody->getName();
@@ -1238,6 +1237,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories(
 
       catListSlotSym->used = true;
       objFile->symbols.push_back(catListSlotSym);
+      addInputSection(listSec);
 
       // Now link the category body into the newly created slot
       createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
diff --git a/lld/test/MachO/objc-category-merging-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s
index 527493303c583e..b94799a57a4d85 100644
--- a/lld/test/MachO/objc-category-merging-minimal.s
+++ b/lld/test/MachO/objc-category-merging-minimal.s
@@ -9,7 +9,7 @@
 ## Create our main testing dylib - linking against the fake dylib above
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal.o merge_cat_minimal.s
 # RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_no_merge.dylib a64_fakedylib.dylib merge_cat_minimal.o
-# RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o
+# RUN: %lld -objc_relative_method_lists -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o
 
 ## Now verify that the flag caused category merging to happen appropriatelly
 # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS
@@ -17,7 +17,7 @@
 
 ############ Test merging multiple categories into the base class ############
 # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s
-# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
+# RUN: %lld -arch arm64 -dylib -objc_relative_method_lists -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o
 # RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o
 
 # RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib  | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE
@@ -37,14 +37,14 @@ MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
 MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category01|Category02)
 MERGE_CATS-NEXT:   name {{.*}} Category01|Category02
 MERGE_CATS:       instanceMethods
-MERGE_CATS-NEXT:  24
-MERGE_CATS-NEXT:  2
+MERGE_CATS-NEXT:  entsize 12 (relative)
+MERGE_CATS-NEXT:  count 2
 MERGE_CATS-NEXT:   name {{.*}} cat01_InstanceMethod
 MERGE_CATS-NEXT:  types {{.*}} v16@0:8
-MERGE_CATS-NEXT:    imp -[MyBaseClass(Category01) cat01_InstanceMethod]
+MERGE_CATS-NEXT:    imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod]
 MERGE_CATS-NEXT:   name {{.*}} cat02_InstanceMethod
 MERGE_CATS-NEXT:  types {{.*}} v16@0:8
-MERGE_CATS-NEXT:    imp -[MyBaseClass(Category02) cat02_InstanceMethod]
+MERGE_CATS-NEXT:    imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod]
 MERGE_CATS-NEXT:         classMethods 0x0
 MERGE_CATS-NEXT:            protocols 0x0
 MERGE_CATS-NEXT:   instanceProperties 0x0
@@ -69,17 +69,17 @@ YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02
 YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass
 YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass
 YES_MERGE_INTO_BASE: baseMethods
-YES_MERGE_INTO_BASE-NEXT: entsize 24
+YES_MERGE_INTO_BASE-NEXT: entsize 12 (relative)
 YES_MERGE_INTO_BASE-NEXT: count 3
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod]
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod]
 YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod
 YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8
-YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod]
+YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass baseInstanceMethod]
 
 
 #### Check merge swift category into base class ###

From abaa53199ed03b2e9de9fd373cbcfcc88e5348ff Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng@spacemit.com>
Date: Thu, 15 Aug 2024 10:37:04 +0800
Subject: [PATCH 20/47] [RISCV] Implement
 RISCVTTIImpl::shouldConsiderAddressTypePromotion for RISCV (#102560)

This optimization helps reduce repeated calculations of base addresses
by extracting type extensions when the same base address is accessed
multiple times but its offset is a constant.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  1 +
 .../Target/RISCV/RISCVTargetTransformInfo.cpp | 32 +++++++
 .../Target/RISCV/RISCVTargetTransformInfo.h   |  4 +-
 .../RISCV/riscv-codegen-prepare-atp.ll        | 95 +++++++++++++++++++
 4 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 02f48d41b56b3c..911fa45d7173e8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1428,6 +1428,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   // Disable strict node mutation.
   IsStrictFPEnabled = true;
+  EnableExtLdPromotion = true;
 
   // Let the subtarget decide if a predictable select is more expensive than the
   // corresponding branch. This information is used in CGP/SelectOpt to decide
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 85683c62064435..781e3d7929aa43 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -2000,3 +2000,35 @@ bool RISCVTTIImpl::areInlineCompatible(const Function *Caller,
   // target-features.
   return (CallerBits & CalleeBits) == CalleeBits;
 }
+
+/// See if \p I should be considered for address type promotion. We check if \p
+/// I is a sext with right type and used in memory accesses. If it used in a
+/// "complex" getelementptr, we allow it to be promoted without finding other
+/// sext instructions that sign extended the same initial value. A getelementptr
+/// is considered as "complex" if it has more than 2 operands.
+bool RISCVTTIImpl::shouldConsiderAddressTypePromotion(
+    const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
+  bool Considerable = false;
+  AllowPromotionWithoutCommonHeader = false;
+  if (!isa<SExtInst>(&I))
+    return false;
+  Type *ConsideredSExtType =
+      Type::getInt64Ty(I.getParent()->getParent()->getContext());
+  if (I.getType() != ConsideredSExtType)
+    return false;
+  // See if the sext is the one with the right type and used in at least one
+  // GetElementPtrInst.
+  for (const User *U : I.users()) {
+    if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+      Considerable = true;
+      // A getelementptr is considered as "complex" if it has more than 2
+      // operands. We will promote a SExt used in such complex GEP as we
+      // expect some computation to be merged if they are done on 64 bits.
+      if (GEPInst->getNumOperands() > 2) {
+        AllowPromotionWithoutCommonHeader = true;
+        break;
+      }
+    }
+  }
+  return Considerable;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9c37a4f6ec2d04..f5eca2839acd05 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -397,7 +397,9 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   bool shouldFoldTerminatingConditionAfterLSR() const {
     return true;
   }
-
+  bool
+  shouldConsiderAddressTypePromotion(const Instruction &I,
+                                     bool &AllowPromotionWithoutCommonHeader);
   std::optional<unsigned> getMinPageSize() const { return 4096; }
 };
 
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll b/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll
new file mode 100644
index 00000000000000..b733c6a1c787ba
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -S | FileCheck %s
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "riscv64"
+
+%struct.match_state = type { i64, i64  }
+
+; %add is also promoted by forking an extra sext.
+define void @promoteTwoOne(i32 %i, i32 %j, ptr %P1, ptr %P2 ) {
+; CHECK-LABEL: define void @promoteTwoOne(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[S2:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[PROMOTED2:%.*]] = sext i32 [[J]] to i64
+; CHECK-NEXT:    [[S:%.*]] = add nsw i64 [[S2]], [[PROMOTED2]]
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]]
+; CHECK-NEXT:    store i64 [[S]], ptr [[ADDR1]], align 8
+; CHECK-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]]
+; CHECK-NEXT:    store i64 [[S2]], ptr [[ADDR2]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add = add nsw i32 %i, %j
+  %s = sext i32 %add to i64
+  %addr1 = getelementptr inbounds i64, ptr %P1, i64 %s
+  store i64 %s, ptr %addr1
+  %s2 = sext i32 %i to i64
+  %addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2
+  store i64 %s2, ptr %addr2
+  ret void
+}
+
+; Both %add1 and %add2 are promoted by forking extra sexts.
+define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, ptr %P1, ptr %P2) {
+; CHECK-LABEL: define void @promoteTwoTwo(
+; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], i32 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PROMOTED3:%.*]] = sext i32 [[J]] to i64
+; CHECK-NEXT:    [[PROMOTED4:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[S:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED4]]
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]]
+; CHECK-NEXT:    store i64 [[S]], ptr [[ADDR1]], align 8
+; CHECK-NEXT:    [[PROMOTED2:%.*]] = sext i32 [[K]] to i64
+; CHECK-NEXT:    [[S2:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED2]]
+; CHECK-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]]
+; CHECK-NEXT:    store i64 [[S2]], ptr [[ADDR2]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %add1 = add nsw i32 %j, %i
+  %s = sext i32 %add1 to i64
+  %addr1 = getelementptr inbounds i64, ptr %P1, i64 %s
+  store i64 %s, ptr %addr1
+  %add2 = add nsw i32 %j, %k
+  %s2 = sext i32 %add2 to i64
+  %addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2
+  store i64 %s2, ptr %addr2
+  ret void
+}
+
+define i64 @promoteGEPSunk(i1 %cond, ptr %base, i32 %i) {
+; CHECK-LABEL: define i64 @promoteGEPSunk(
+; CHECK-SAME: i1 [[COND:%.*]], ptr [[BASE:%.*]], i32 [[I:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PROMOTED1:%.*]] = sext i32 [[I]] to i64
+; CHECK-NEXT:    [[S:%.*]] = add nsw i64 [[PROMOTED1]], 1
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S]]
+; CHECK-NEXT:    [[S2:%.*]] = add nsw i64 [[PROMOTED1]], 2
+; CHECK-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S2]]
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_THEN2:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[ADDR]], align 8
+; CHECK-NEXT:    [[V2:%.*]] = load i64, ptr [[ADDR2]], align 8
+; CHECK-NEXT:    [[R:%.*]] = add i64 [[V]], [[V2]]
+; CHECK-NEXT:    ret i64 [[R]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    ret i64 0
+;
+entry:
+  %add = add nsw i32 %i, 1
+  %s = sext i32 %add to i64
+  %addr = getelementptr inbounds i64, ptr %base, i64 %s
+  %add2 = add nsw i32 %i,  2
+  %s2 = sext i32 %add2 to i64
+  %addr2 = getelementptr inbounds i64, ptr %base, i64 %s2
+  br i1 %cond, label %if.then, label %if.then2
+if.then:
+  %v = load i64, ptr %addr
+  %v2 = load i64, ptr %addr2
+  %r = add i64 %v, %v2
+  ret i64 %r
+if.then2:
+  ret i64 0;
+}

From b57038a611329ec42858b714effb482cbfc4d4e1 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Wed, 14 Aug 2024 20:03:45 -0700
Subject: [PATCH 21/47] [OpenMP] Use range-based for loops (NFC) (#103511)

---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 83fec194d73904..f9b070e6f1eae4 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2022,8 +2022,8 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
           Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
     }
 
-    llvm::for_each(llvm::reverse(ToBeDeleted),
-                   [](Instruction *I) { I->eraseFromParent(); });
+    for (Instruction *I : llvm::reverse(ToBeDeleted))
+      I->eraseFromParent();
   };
 
   addOutlineInfo(std::move(OI));
@@ -7049,8 +7049,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
     }
 
     StaleCI->eraseFromParent();
-    llvm::for_each(llvm::reverse(ToBeDeleted),
-                   [](Instruction *I) { I->eraseFromParent(); });
+    for (Instruction *I : llvm::reverse(ToBeDeleted))
+      I->eraseFromParent();
   };
   addOutlineInfo(std::move(OI));
 
@@ -8345,9 +8345,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
                            omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
                        Args);
 
-    llvm::for_each(llvm::reverse(ToBeDeleted),
-                   [](Instruction *I) { I->eraseFromParent(); });
-
+    for (Instruction *I : llvm::reverse(ToBeDeleted))
+      I->eraseFromParent();
   };
 
   if (!Config.isTargetDevice())

From b4dc9869381f91af419ec170837ac324d09525e5 Mon Sep 17 00:00:00 2001
From: Daniel Wedzicha <55595431+boredhuman@users.noreply.github.com>
Date: Thu, 15 Aug 2024 00:23:40 -0400
Subject: [PATCH 22/47] [LLDB][OSX] Removed semi colon generating a warning
 during build (#104398)

Singular warning I noticed when compiling lldb.

Co-authored-by: Daniel <d.wedzicha@efg.gg>
---
 lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
index b714f7be187aca..d27bd1b7426e6c 100644
--- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
+++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
@@ -128,7 +128,7 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) {
 static bool ResolveAndVerifyCandidateSupportDir(FileSpec &path) {
   FileSystem::Instance().Resolve(path);
   return FileSystem::Instance().IsDirectory(path);
-};
+}
 
 bool HostInfoMacOSX::ComputeSupportExeDirectory(FileSpec &file_spec) {
   FileSpec lldb_file_spec = GetShlibDir();

From e0d173d44161bf9b68243845666d58999e74f759 Mon Sep 17 00:00:00 2001
From: Max Winkler <max.enrico.winkler@gmail.com>
Date: Wed, 14 Aug 2024 21:51:57 -0700
Subject: [PATCH 23/47] [Clang] [AST] Fix placeholder return type name mangling
 for MSVC 1920+ / VS2019+ (#102848)

Partial fix for https://github.com/llvm/llvm-project/issues/92204.
This PR just fixes VS2019+ since that is the suite of compilers that I
require link compatibility with at the moment.
I still intend to fix VS2017 and to update llvm-undname in future PRs.
Once those are also finished and merged I'll close out
https://github.com/llvm/llvm-project/issues/92204.
I am hoping to get the llvm-undname PR up in a couple of weeks to be
able to demangle the VS2019+ name mangling.

MSVC 1920+ mangles placeholder return types for non-templated functions
with "@".
For example `auto foo() { return 0; }` is mangled as `?foo@@YA@XZ`.

MSVC 1920+ mangles placeholder return types for templated functions as
the qualifiers of the AutoType followed by "_P" for `auto` and "_T" for
`decltype(auto)`.
For example `template<class T> auto foo() { return 0; }` is mangled as
`??$foo@H@@YA?A_PXZ` when `foo` is instantiated as follows `foo<int>()`.

Lambdas with placeholder return types are still mangled with clang's
custom mangling since MSVC lambda mangling hasn't been deciphered yet.
Similarly any pointers in the return type with an address space are
mangled with clang's custom mangling since that is a clang extension.

We cannot augment `mangleType` to support this mangling scheme as the
mangling schemes for variables and functions differ.
auto variables are encoded with the fully deduced type where auto return
types are not.
The following two functions with a static variable are mangled the same
```
template<class T>
int test()
{
    static int i = 0; // "?i@?1???$test@H@@YAHXZ@4HA"
    return i;
}

template<class T>
int test()
{
    static auto i = 0; // "?i@?1???$test@H@@YAHXZ@4HA"
    return i;
}
```
Inside `mangleType` once we get to mangling the `AutoType` we have no
context if we are from a variable encoding or some other encoding.
Therefore it was easier to handle any special casing for `AutoType`
return types with a separate function instead of using the `mangleType`
infrastructure.
---
 clang/docs/ReleaseNotes.rst                   |   2 +
 clang/lib/AST/MicrosoftMangle.cpp             | 161 +++++++-
 .../test/CodeGenCXX/mangle-ms-auto-return.cpp | 369 ++++++++++++++++++
 .../mangle-ms-auto-templates-memptrs.cpp      |  12 +-
 .../mangle-ms-auto-templates-nullptr.cpp      |   2 +-
 .../CodeGenCXX/mangle-ms-auto-templates.cpp   |   6 +-
 6 files changed, 533 insertions(+), 19 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/mangle-ms-auto-return.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index f5696d6ce15dc7..b1864901e7bddb 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -77,6 +77,8 @@ C++ Specific Potentially Breaking Changes
 ABI Changes in This Version
 ---------------------------
 
+- Fixed Microsoft name mangling of placeholder, auto and decltype(auto), return types for MSVC 1920+. This change resolves incompatibilities with code compiled by MSVC 1920+ but will introduce incompatibilities with code compiled by earlier versions of Clang unless such code is built with the compiler option -fms-compatibility-version=19.14 to imitate the MSVC 1914 mangling behavior.
+
 AST Dumping Potentially Breaking Changes
 ----------------------------------------
 
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index ed8d1cf1b98dd8..a113574675b4c5 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -408,6 +408,8 @@ class MicrosoftCXXNameMangler {
   void mangleSourceName(StringRef Name);
   void mangleNestedName(GlobalDecl GD);
 
+  void mangleAutoReturnType(QualType T, QualifierMangleMode QMM);
+
 private:
   bool isStructorDecl(const NamedDecl *ND) const {
     return ND == Structor || getStructor(ND) == Structor;
@@ -477,6 +479,11 @@ class MicrosoftCXXNameMangler {
                           SourceRange Range);
   void mangleObjCKindOfType(const ObjCObjectType *T, Qualifiers Quals,
                             SourceRange Range);
+
+  void mangleAutoReturnType(const MemberPointerType *T, Qualifiers Quals);
+  void mangleAutoReturnType(const PointerType *T, Qualifiers Quals);
+  void mangleAutoReturnType(const LValueReferenceType *T, Qualifiers Quals);
+  void mangleAutoReturnType(const RValueReferenceType *T, Qualifiers Quals);
 };
 }
 
@@ -2494,6 +2501,57 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T,
   mangleArtificialTagType(TagTypeKind::Struct, ASMangling, {"__clang"});
 }
 
+void MicrosoftCXXNameMangler::mangleAutoReturnType(QualType T,
+                                                   QualifierMangleMode QMM) {
+  assert(getASTContext().getLangOpts().isCompatibleWithMSVC(
+             LangOptions::MSVC2019) &&
+         "Cannot mangle MSVC 2017 auto return types!");
+
+  if (isa<AutoType>(T)) {
+    const auto *AT = T->getContainedAutoType();
+    Qualifiers Quals = T.getLocalQualifiers();
+
+    if (QMM == QMM_Result)
+      Out << '?';
+    if (QMM != QMM_Drop)
+      mangleQualifiers(Quals, false);
+    Out << (AT->isDecltypeAuto() ? "_T" : "_P");
+    return;
+  }
+
+  T = T.getDesugaredType(getASTContext());
+  Qualifiers Quals = T.getLocalQualifiers();
+
+  switch (QMM) {
+  case QMM_Drop:
+  case QMM_Result:
+    break;
+  case QMM_Mangle:
+    mangleQualifiers(Quals, false);
+    break;
+  default:
+    llvm_unreachable("QMM_Escape unexpected");
+  }
+
+  const Type *ty = T.getTypePtr();
+  switch (ty->getTypeClass()) {
+  case Type::MemberPointer:
+    mangleAutoReturnType(cast<MemberPointerType>(ty), Quals);
+    break;
+  case Type::Pointer:
+    mangleAutoReturnType(cast<PointerType>(ty), Quals);
+    break;
+  case Type::LValueReference:
+    mangleAutoReturnType(cast<LValueReferenceType>(ty), Quals);
+    break;
+  case Type::RValueReference:
+    mangleAutoReturnType(cast<RValueReferenceType>(ty), Quals);
+    break;
+  default:
+    llvm_unreachable("Invalid type expected");
+  }
+}
+
 void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range,
                                          QualifierMangleMode QMM) {
   // Don't use the canonical types.  MSVC includes things like 'const' on
@@ -2907,17 +2965,51 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
       // can differ by their calling convention and are typically deduced.  So
       // we make sure that this type gets mangled properly.
       mangleType(ResultType, Range, QMM_Result);
-    } else if (const auto *AT = dyn_cast_or_null<AutoType>(
-                   ResultType->getContainedAutoType())) {
-      Out << '?';
-      mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
-      Out << '?';
+    } else if (IsInLambda) {
+      if (const auto *AT = ResultType->getContainedAutoType()) {
+        assert(AT->getKeyword() == AutoTypeKeyword::Auto &&
+               "should only need to mangle auto!");
+        Out << '?';
+        mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
+        Out << '?';
+        mangleSourceName("<auto>");
+        Out << '@';
+      } else {
+        Out << '@';
+      }
+    } else if (const auto *AT = ResultType->getContainedAutoType()) {
       assert(AT->getKeyword() != AutoTypeKeyword::GNUAutoType &&
              "shouldn't need to mangle __auto_type!");
-      mangleSourceName(AT->isDecltypeAuto() ? "<decltype-auto>" : "<auto>");
-      Out << '@';
-    } else if (IsInLambda) {
-      Out << '@';
+
+      // If we have any pointer types with the clang address space extension
+      // then defer to the custom clang mangling to keep backwards
+      // compatibility. See `mangleType(const PointerType *T, Qualifiers Quals,
+      // SourceRange Range)` for details.
+      auto UseClangMangling = [](QualType ResultType) {
+        QualType T = ResultType;
+        while (const auto *PT = dyn_cast<PointerType>(T.getTypePtr())) {
+          T = T->getPointeeType();
+          if (T.getQualifiers().hasAddressSpace())
+            return true;
+        }
+        return false;
+      };
+
+      if (getASTContext().getLangOpts().isCompatibleWithMSVC(
+              LangOptions::MSVC2019) &&
+          !UseClangMangling(ResultType)) {
+        if (D && !D->getPrimaryTemplate()) {
+          Out << '@';
+        } else {
+          mangleAutoReturnType(ResultType, QMM_Result);
+        }
+      } else {
+        Out << '?';
+        mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
+        Out << '?';
+        mangleSourceName(AT->isDecltypeAuto() ? "<decltype-auto>" : "<auto>");
+        Out << '@';
+      }
     } else {
       if (ResultType->isVoidType())
         ResultType = ResultType.getUnqualifiedType();
@@ -4220,6 +4312,57 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
   Mangler.getStream() << '@';
 }
 
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const MemberPointerType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
+  if (const FunctionProtoType *FPT = PointeeType->getAs<FunctionProtoType>()) {
+    Out << '8';
+    mangleName(T->getClass()->castAs<RecordType>()->getDecl());
+    mangleFunctionType(FPT, nullptr, true);
+  } else {
+    mangleQualifiers(PointeeType.getQualifiers(), true);
+    mangleName(T->getClass()->castAs<RecordType>()->getDecl());
+    mangleAutoReturnType(PointeeType, QMM_Drop);
+  }
+}
+
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const PointerType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  assert(!PointeeType.getQualifiers().hasAddressSpace() &&
+         "Unexpected address space mangling required");
+
+  manglePointerCVQualifiers(Quals);
+  manglePointerExtQualifiers(Quals, PointeeType);
+
+  if (const FunctionProtoType *FPT = PointeeType->getAs<FunctionProtoType>()) {
+    Out << '6';
+    mangleFunctionType(FPT);
+  } else {
+    mangleAutoReturnType(PointeeType, QMM_Mangle);
+  }
+}
+
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const LValueReferenceType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!");
+  Out << 'A';
+  manglePointerExtQualifiers(Quals, PointeeType);
+  mangleAutoReturnType(PointeeType, QMM_Mangle);
+}
+
+void MicrosoftCXXNameMangler::mangleAutoReturnType(const RValueReferenceType *T,
+                                                   Qualifiers Quals) {
+  QualType PointeeType = T->getPointeeType();
+  assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!");
+  Out << "$$Q";
+  manglePointerExtQualifiers(Quals, PointeeType);
+  mangleAutoReturnType(PointeeType, QMM_Mangle);
+}
+
 MicrosoftMangleContext *MicrosoftMangleContext::create(ASTContext &Context,
                                                        DiagnosticsEngine &Diags,
                                                        bool IsAux) {
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp
new file mode 100644
index 00000000000000..737c9c407f4703
--- /dev/null
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp
@@ -0,0 +1,369 @@
+// RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19.20 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-windows-msvc | FileCheck %s
+
+struct StructA {};
+
+template<class T>
+auto AutoT() { return T(); }
+
+template<class T>
+const auto AutoConstT() { return T(); }
+
+template<class T>
+volatile auto AutoVolatileT() { return T(); }
+
+template<class T>
+const volatile auto AutoConstVolatileT() { return T(); }
+
+// The qualifiers of the return type should always be emitted even for void types.
+// Void types usually have their qualifers stripped in the mangled name for MSVC ABI.
+void test_template_auto_void() {
+  AutoT<void>();
+  // CHECK: call {{.*}} @"??$AutoT@X@@YA?A_PXZ"
+
+  AutoT<const void>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CBX@@YA?A_PXZ"
+
+  AutoT<volatile void>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CCX@@YA?A_PXZ"
+
+  AutoT<const volatile void>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CDX@@YA?A_PXZ"
+
+  AutoConstT<void>();
+  // CHECK: call {{.*}} @"??$AutoConstT@X@@YA?B_PXZ"
+
+  AutoVolatileT<void>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@X@@YA?C_PXZ"
+
+  AutoConstVolatileT<void>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@X@@YA?D_PXZ"
+}
+
+void test_template_auto_int() {
+  AutoT<int>();
+  // CHECK: call {{.*}} @"??$AutoT@H@@YA?A_PXZ"
+
+  AutoT<const int>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CBH@@YA?A_PXZ"
+
+  AutoT<volatile int>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CCH@@YA?A_PXZ"
+
+  AutoT<const volatile int>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CDH@@YA?A_PXZ"
+
+  AutoConstT<int>();
+  // CHECK: call {{.*}} @"??$AutoConstT@H@@YA?B_PXZ"
+
+  AutoVolatileT<int>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@H@@YA?C_PXZ"
+
+  AutoConstVolatileT<int>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@H@@YA?D_PXZ"
+}
+
+void test_template_auto_struct() {
+  AutoT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoT@UStructA@@@@YA?A_PXZ"
+
+  AutoT<const StructA>();
+  // CHECK: call {{.*}} @"??$AutoT@$$CBUStructA@@@@YA?A_PXZ"
+
+  AutoConstT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoConstT@UStructA@@@@YA?B_PXZ"
+
+  AutoVolatileT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@UStructA@@@@YA?C_PXZ"
+
+  AutoConstVolatileT<StructA>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@UStructA@@@@YA?D_PXZ"
+}
+
+void test_template_auto_ptr() {
+  AutoT<int*>();
+  // CHECK: call {{.*}} @"??$AutoT@PEAH@@YA?A_PXZ"
+
+  AutoT<const int*>();
+  // CHECK: call {{.*}} @"??$AutoT@PEBH@@YA?A_PXZ"
+
+  AutoT<const int* const>();
+  // CHECK: call {{.*}} @"??$AutoT@QEBH@@YA?A_PXZ"
+
+  AutoConstT<int*>();
+  // CHECK: call {{.*}} @"??$AutoConstT@PEAH@@YA?B_PXZ"
+
+  AutoVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$AutoVolatileT@PEAH@@YA?C_PXZ"
+
+  AutoConstVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$AutoConstVolatileT@PEAH@@YA?D_PXZ"
+}
+
+template<class T>
+auto* PtrAutoT() { return T(); }
+
+template<class T>
+const auto* PtrAutoConstT() { return T(); }
+
+template<class T>
+volatile auto* PtrAutoVolatileT() { return T(); }
+
+template<class T>
+const volatile auto* PtrAutoConstVolatileT() { return T(); }
+
+void test_template_ptr_auto() {
+  PtrAutoT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoT@PEAH@@YAPEA_PXZ"
+
+  PtrAutoT<const int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoT@PEBH@@YAPEA_PXZ"
+
+  PtrAutoT<const int* const>();
+  // CHECK: call {{.*}} @"??$PtrAutoT@QEBH@@YAPEA_PXZ"
+
+  PtrAutoConstT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoConstT@PEAH@@YAPEB_PXZ"
+
+  PtrAutoVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoVolatileT@PEAH@@YAPEC_PXZ"
+
+  PtrAutoConstVolatileT<int*>();
+  // CHECK: call {{.*}} @"??$PtrAutoConstVolatileT@PEAH@@YAPED_PXZ"
+}
+
+int func_int();
+const int func_constint();
+void func_void();
+int* func_intptr();
+
+template<class T, T v>
+auto (*FuncPtrAutoT())() { return v; }
+
+void test_template_func_ptr_auto() {
+  FuncPtrAutoT<int (*)(), &func_int>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AHXZ$1?func_int@@YAHXZ@@YAP6A?A_PXZXZ"
+
+  FuncPtrAutoT<const int (*)(), &func_constint>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6A?BHXZ$1?func_constint@@YA?BHXZ@@YAP6A?A_PXZXZ"
+
+  FuncPtrAutoT<void (*)(), &func_void>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AXXZ$1?func_void@@YAXXZ@@YAP6A?A_PXZXZ"
+
+  FuncPtrAutoT<int * (*)(), &func_intptr>();
+  // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6APEAHXZ$1?func_intptr@@YAPEAHXZ@@YAP6A?A_PXZXZ"
+}
+
+template<class T>
+auto& RefAutoT(T& x) { return x; }
+
+template<class T>
+const auto& ConstRefAutoT(T& x) { return x; }
+
+template<class T>
+auto&& RRefAutoT(T& x) { return static_cast<int&&>(x); }
+
+void test_template_ref_auto() {
+  int x;
+
+  RefAutoT(x);
+  // CHECK: call {{.*}} @"??$RefAutoT@H@@YAAEA_PAEAH@Z"
+
+  ConstRefAutoT(x);
+  // CHECK: call {{.*}} @"??$ConstRefAutoT@H@@YAAEB_PAEAH@Z"
+
+  RRefAutoT(x);
+  // CHECK: call {{.*}} @"??$RRefAutoT@H@@YA$$QEA_PAEAH@Z"
+}
+
+template<class T>
+decltype(auto) DecltypeAutoT() { return T(); }
+
+template<class T>
+decltype(auto) DecltypeAutoT2(T& x) { return static_cast<T&&>(x); }
+
+void test_template_decltypeauto() {
+  DecltypeAutoT<void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@X@@YA?A_TXZ"
+
+  DecltypeAutoT<const void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBX@@YA?A_TXZ"
+
+  DecltypeAutoT<volatile void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCX@@YA?A_TXZ"
+
+  DecltypeAutoT<const volatile void>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDX@@YA?A_TXZ"
+
+  DecltypeAutoT<int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@H@@YA?A_TXZ"
+
+  DecltypeAutoT<const int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBH@@YA?A_TXZ"
+
+  DecltypeAutoT<volatile int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCH@@YA?A_TXZ"
+
+  DecltypeAutoT<const volatile int>();
+  // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDH@@YA?A_TXZ"
+
+  int x;
+
+  DecltypeAutoT2(x);
+  // CHECK: call {{.*}} @"??$DecltypeAutoT2@H@@YA?A_TAEAH@Z"
+}
+
+// Still want to use clang's custom mangling for lambdas to keep backwards compatibility until
+// MSVC lambda name mangling has been deciphered.
+void test_lambda() {
+  auto lambdaIntRetAuto = []() { return 0; };
+  lambdaIntRetAuto();
+  // CHECK: call {{.*}} @"??R<lambda_1>@?0??test_lambda@@YAXXZ@QEBA?A?<auto>@@XZ"
+
+  auto lambdaIntRet = []() -> int { return 0; };
+  lambdaIntRet();
+  // CHECK: call {{.*}} @"??R<lambda_2>@?0??test_lambda@@YAXXZ@QEBA@XZ"
+
+  auto lambdaGenericIntIntRetAuto = [](auto a) { return a; };
+  lambdaGenericIntIntRetAuto(0);
+  // CHECK: call {{.*}} @"??$?RH@<lambda_0>@?0??test_lambda@@YAXXZ@QEBA?A?<auto>@@H@Z"
+}
+
+auto TestTrailingInt() -> int {
+  return 0;
+}
+
+auto TestTrailingConstVolatileVoid() -> const volatile void {
+}
+
+auto TestTrailingStructA() -> StructA {
+  return StructA{};
+}
+
+void test_trailing_return() {
+  TestTrailingInt();
+  // CHECK: call {{.*}} @"?TestTrailingInt@@YAHXZ"
+
+  TestTrailingConstVolatileVoid();
+  // CHECK: call {{.*}} @"?TestTrailingConstVolatileVoid@@YAXXZ"
+
+  TestTrailingStructA();
+  // CHECK: call {{.*}} @"?TestTrailingStructA@@YA?AUStructA@@XZ"
+}
+
+auto TestNonTemplateAutoInt() {
+  return 0;
+}
+
+auto TestNonTemplateAutoVoid() {
+  return;
+}
+
+auto TestNonTemplateAutoStructA() {
+  return StructA{};
+}
+
+const auto TestNonTemplateConstAutoInt() {
+  return 0;
+}
+
+const auto TestNonTemplateConstAutoVoid() {
+  return;
+}
+
+const auto TestNonTemplateConstAutoStructA() {
+  return StructA{};
+}
+
+void test_nontemplate_auto() {
+  TestNonTemplateAutoInt();
+  // CHECK: call {{.*}} @"?TestNonTemplateAutoInt@@YA@XZ"
+
+  TestNonTemplateAutoVoid();
+  // CHECK: call {{.*}} @"?TestNonTemplateAutoVoid@@YA@XZ"
+
+  TestNonTemplateAutoStructA();
+  // CHECK: call {{.*}} @"?TestNonTemplateAutoStructA@@YA@XZ"
+
+  TestNonTemplateConstAutoInt();
+  // CHECK: call {{.*}} @"?TestNonTemplateConstAutoInt@@YA@XZ"
+
+  TestNonTemplateConstAutoVoid();
+  // CHECK: call {{.*}} @"?TestNonTemplateConstAutoVoid@@YA@XZ"
+
+  TestNonTemplateConstAutoStructA();
+  // CHECK: call {{.*}} @"?TestNonTemplateConstAutoStructA@@YA@XZ"
+}
+
+decltype(auto) TestNonTemplateDecltypeAutoInt() {
+    return 0;
+}
+
+decltype(auto) TestNonTemplateDecltypeAutoVoid() {
+    return;
+}
+
+decltype(auto) TestNonTemplateDecltypeAutoStructA() {
+    return StructA{};
+}
+
+void test_nontemplate_decltypeauto() {
+  TestNonTemplateDecltypeAutoInt();
+  // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoInt@@YA@XZ"
+
+  TestNonTemplateDecltypeAutoVoid();
+  // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoVoid@@YA@XZ"
+
+  TestNonTemplateDecltypeAutoStructA();
+  // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoStructA@@YA@XZ"
+}
+
+struct StructB {
+  int x;
+};
+
+template<class T>
+auto StructB::* AutoMemberDataPtrT(T x) { return x; }
+
+template<class T>
+const auto StructB::* AutoConstMemberDataPtrT(T x) { return x; }
+
+void test_template_auto_member_data_ptr() {
+  AutoMemberDataPtrT(&StructB::x);
+  // CHECK: call {{.*}} @"??$AutoMemberDataPtrT@PEQStructB@@H@@YAPEQStructB@@_PPEQ0@H@Z"
+
+  AutoConstMemberDataPtrT(&StructB::x);
+  // CHECK: call {{.*}} @"??$AutoConstMemberDataPtrT@PEQStructB@@H@@YAPERStructB@@_PPEQ0@H@Z"
+}
+
+struct StructC {
+  void test() {}
+};
+
+struct StructD {
+  const int test() { return 0; }
+};
+
+template<class T>
+auto (StructC::*AutoMemberFuncPtrT(T x))() { return x; }
+
+template<class T>
+const auto (StructD::*AutoConstMemberFuncPtrT(T x))() { return x; }
+
+void test_template_auto_member_func_ptr() {
+  AutoMemberFuncPtrT(&StructC::test);
+  // CHECK: call {{.*}} @"??$AutoMemberFuncPtrT@P8StructC@@EAAXXZ@@YAP8StructC@@EAA?A_PXZP80@EAAXXZ@Z"
+
+  AutoConstMemberFuncPtrT(&StructD::test);
+  // CHECK: call {{.*}} @"??$AutoConstMemberFuncPtrT@P8StructD@@EAA?BHXZ@@YAP8StructD@@EAA?B_PXZP80@EAA?BHXZ@Z"
+}
+
+template<class T>
+auto * __attribute__((address_space(1))) * AutoPtrAddressSpaceT() {
+  T * __attribute__((address_space(1))) * p = nullptr;
+  return p;
+}
+
+void test_template_auto_address_space_ptr() {
+  AutoPtrAddressSpaceT<int>();
+  // CHECK: call {{.*}} @"??$AutoPtrAddressSpaceT@H@@YA?A?<auto>@@XZ"
+}
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp
index 360ebdecc5562b..b7bc3953f0b438 100644
--- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp
@@ -34,15 +34,15 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$I?f@V@@QEAAXXZA@A@@@QEAA@XZ"
 
   AutoFunc<&S::f>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$1?f@S@@QEAAXXZ@@YA?A?<auto>@@XZ"
 
   AutoFunc<&M::f>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$H?f@M@@QEAAXXZA@@@YA?A?<auto>@@XZ"
 
   AutoFunc<&V::f>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$I?f@V@@QEAAXXZA@A@@@YA?A?<auto>@@XZ"
 
   AutoParmTemplate<&S::a> auto_data_single_inheritance;
@@ -58,14 +58,14 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$FBA@A@@@QEAA@XZ"
 
   AutoFunc<&S::a>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$07@@YA?A?<auto>@@XZ"
 
   AutoFunc<&M::a>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$0M@@@YA?A?<auto>@@XZ"
 
   AutoFunc<&V::a>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$FBA@A@@@YA?A?<auto>@@XZ"
 }
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp
index 8f98c1e59f73d7..251d9219c01ce2 100644
--- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp
@@ -19,6 +19,6 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0A@@@QEAA@XZ"
 
   AutoFunc<nullptr>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$0A@@@YA?A?<auto>@@XZ"
 }
diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp
index ff5395cea75eb7..effcc31ee31103 100644
--- a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp
+++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp
@@ -26,7 +26,7 @@ int j;
 
 void template_mangling() {
   AutoFunc<1>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$00@@YA?A?<auto>@@XZ"
   AutoParmTemplate<0> auto_int;
   // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$MH0A@@@QEAA@XZ"
@@ -52,7 +52,7 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$00$0HPPPPPPPPPPPPPPP@@@QEAA@XZ"
 
   AutoFunc<&i>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$1?i@@3HA@@YA?A?<auto>@@XZ"
 
   AutoParmTemplate<&i> auto_int_ptr;
@@ -64,7 +64,7 @@ void template_mangling() {
   // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$1?i@@3HA$1?j@@3HA@@QEAA@XZ"
 
   AutoFunc<&Func>();
-  // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A?<auto>@@XZ"
+  // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A_PXZ"
   // BEFORE: call {{.*}} @"??$AutoFunc@$1?Func@@YAHXZ@@YA?A?<auto>@@XZ"
 
   AutoParmTemplate<&Func> auto_func_ptr;

From 6e2d9df02502e16659e4a9397260baf9df224f17 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Wed, 14 Aug 2024 22:14:29 -0700
Subject: [PATCH 24/47] Fix testcases. Use -emit-llvm and not -S. Use LABEL
 checking.

---
 clang/test/CodeGen/overflow-idiom-exclusion-fp.c |  4 ++--
 clang/test/CodeGen/overflow-idiom-exclusion.c    | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
index 1d26caa5b4f54b..511a88cc7a2836 100644
--- a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
+++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s
 
 // Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns
 extern unsigned a, b, c;
diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c
index 02dd3ef3ae42da..c4756a0b80f61e 100644
--- a/clang/test/CodeGen/overflow-idiom-exclusion.c
+++ b/clang/test/CodeGen/overflow-idiom-exclusion.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -o - %s | FileCheck %s --check-prefix=ADD
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -o - %s | FileCheck %s --check-prefix=NEGATE
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -o - %s | FileCheck %s --check-prefix=WHILE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE
 
 // Ensure some common overflow-dependent or overflow-prone code patterns don't
 // trigger the overflow sanitizers. In many cases, overflow warnings caused by
@@ -32,6 +32,7 @@
 // NEGATE: usub.with.overflow
 // NEGATE-NOT: negate_overflow
 
+// WHILE-LABEL: @basic_commutativity
 // WHILE: handler.add_overflow
 // WHILE: negate_overflow
 // WHILE-NOT: usub.with.overflow
@@ -57,6 +58,7 @@ void basic_commutativity(void) {
     c = 9;
 }
 
+// WHILE-LABEL: @arguments_and_commutativity
 void arguments_and_commutativity(unsigned V1, unsigned V2) {
   if (V1 + V2 < V1)
     c = 9;

From 94b8b11ac305ebe730e6b70b2463811de395cb40 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Thu, 15 Aug 2024 05:27:19 +0000
Subject: [PATCH 25/47] [Clang][NFC] Move FindCountedByField into FieldDecl
 (#104235)

FindCountedByField can be used in more places than CodeGen. Move it into
FieldDecl to avoid layering issues.
---
 clang/include/clang/AST/Decl.h      |  4 ++++
 clang/lib/AST/Decl.cpp              | 13 +++++++++++++
 clang/lib/CodeGen/CGBuiltin.cpp     |  2 +-
 clang/lib/CodeGen/CGExpr.cpp        | 18 +-----------------
 clang/lib/CodeGen/CodeGenFunction.h |  4 ----
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h
index 561a9d872acfb0..6d84bd03de810a 100644
--- a/clang/include/clang/AST/Decl.h
+++ b/clang/include/clang/AST/Decl.h
@@ -3206,6 +3206,10 @@ class FieldDecl : public DeclaratorDecl, public Mergeable<FieldDecl> {
   /// Set the C++11 in-class initializer for this member.
   void setInClassInitializer(Expr *NewInit);
 
+  /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns
+  /// \p nullptr if either the attribute or the field doesn't exist.
+  const FieldDecl *findCountedByField() const;
+
 private:
   void setLazyInClassInitializer(LazyDeclStmtPtr NewInit);
 
diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index e125143bc1b270..90caf81757ac96 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -4678,6 +4678,19 @@ void FieldDecl::printName(raw_ostream &OS, const PrintingPolicy &Policy) const {
   DeclaratorDecl::printName(OS, Policy);
 }
 
+const FieldDecl *FieldDecl::findCountedByField() const {
+  const auto *CAT = getType()->getAs<CountAttributedType>();
+  if (!CAT)
+    return nullptr;
+
+  const auto *CountDRE = cast<DeclRefExpr>(CAT->getCountExpr());
+  const auto *CountDecl = CountDRE->getDecl();
+  if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountDecl))
+    CountDecl = IFD->getAnonField();
+
+  return dyn_cast<FieldDecl>(CountDecl);
+}
+
 //===----------------------------------------------------------------------===//
 // TagDecl Implementation
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b5e5240e55be3f..1c0baeaee03632 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -987,7 +987,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type,
     // attribute.
     return nullptr;
 
-  const FieldDecl *CountedByFD = FindCountedByField(FAMDecl);
+  const FieldDecl *CountedByFD = FAMDecl->findCountedByField();
   if (!CountedByFD)
     // Can't find the field referenced by the "counted_by" attribute.
     return nullptr;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f93f8dda0bd29a..0672861790633b 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -1150,22 +1150,6 @@ llvm::Value *CodeGenFunction::EmitLoadOfCountedByField(
                                    getIntAlign(), "..counted_by.load");
 }
 
-const FieldDecl *CodeGenFunction::FindCountedByField(const FieldDecl *FD) {
-  if (!FD)
-    return nullptr;
-
-  const auto *CAT = FD->getType()->getAs<CountAttributedType>();
-  if (!CAT)
-    return nullptr;
-
-  const auto *CountDRE = cast<DeclRefExpr>(CAT->getCountExpr());
-  const auto *CountDecl = CountDRE->getDecl();
-  if (const auto *IFD = dyn_cast<IndirectFieldDecl>(CountDecl))
-    CountDecl = IFD->getAnonField();
-
-  return dyn_cast<FieldDecl>(CountDecl);
-}
-
 void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base,
                                       llvm::Value *Index, QualType IndexType,
                                       bool Accessed) {
@@ -4305,7 +4289,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
           ME->isFlexibleArrayMemberLike(getContext(), StrictFlexArraysLevel) &&
           ME->getMemberDecl()->getType()->isCountAttributedType()) {
         const FieldDecl *FAMDecl = dyn_cast<FieldDecl>(ME->getMemberDecl());
-        if (const FieldDecl *CountFD = FindCountedByField(FAMDecl)) {
+        if (const FieldDecl *CountFD = FAMDecl->findCountedByField()) {
           if (std::optional<int64_t> Diff =
                   getOffsetDifferenceInBits(*this, CountFD, FAMDecl)) {
             CharUnits OffsetDiff = CGM.getContext().toCharUnitsFromBits(*Diff);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 19a7feeb69d820..57e0b7f91e9bf8 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3305,10 +3305,6 @@ class CodeGenFunction : public CodeGenTypeCache {
                                         const FieldDecl *FAMDecl,
                                         uint64_t &Offset);
 
-  /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns
-  /// \p nullptr if either the attribute or the field doesn't exist.
-  const FieldDecl *FindCountedByField(const FieldDecl *FD);
-
   /// Build an expression accessing the "counted_by" field.
   llvm::Value *EmitLoadOfCountedByField(const Expr *Base,
                                         const FieldDecl *FAMDecl,

From 07a8cbaf8dc16bebf6e875173d20299d9cc47cc5 Mon Sep 17 00:00:00 2001
From: Bill Wendling <morbo@google.com>
Date: Wed, 14 Aug 2024 22:51:08 -0700
Subject: [PATCH 26/47] Remove failing test until it can be fixed properly.

---
 clang/test/CodeGen/overflow-idiom-exclusion.c | 153 ------------------
 1 file changed, 153 deletions(-)
 delete mode 100644 clang/test/CodeGen/overflow-idiom-exclusion.c

diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c
deleted file mode 100644
index c4756a0b80f61e..00000000000000
--- a/clang/test/CodeGen/overflow-idiom-exclusion.c
+++ /dev/null
@@ -1,153 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE
-
-// Ensure some common overflow-dependent or overflow-prone code patterns don't
-// trigger the overflow sanitizers. In many cases, overflow warnings caused by
-// these patterns are seen as "noise" and result in users turning off
-// sanitization all together.
-
-// A pattern like "if (a + b < a)" simply checks for overflow and usually means
-// the user is trying to handle it gracefully.
-
-// Similarly, a pattern resembling "while (i--)" is extremely common and
-// warning on its inevitable overflow can be seen as superfluous. Do note that
-// using "i" in future calculations can be tricky because it will still
-// wrap-around.
-
-// Another common pattern that, in some cases, is found to be too noisy is
-// unsigned negation, for example:
-// unsigned long A = -1UL;
-
-
-// CHECK-NOT: handle{{.*}}overflow
-
-// ADD: usub.with.overflow
-// ADD: negate_overflow
-// ADD-NOT: handler.add_overflow
-
-// NEGATE: handler.add_overflow
-// NEGATE: usub.with.overflow
-// NEGATE-NOT: negate_overflow
-
-// WHILE-LABEL: @basic_commutativity
-// WHILE: handler.add_overflow
-// WHILE: negate_overflow
-// WHILE-NOT: usub.with.overflow
-extern unsigned a, b, c;
-extern unsigned some(void);
-
-void basic_commutativity(void) {
-  if (a + b < a)
-    c = 9;
-  if (a + b < b)
-    c = 9;
-  if (b + a < b)
-    c = 9;
-  if (b + a < a)
-    c = 9;
-  if (a > a + b)
-    c = 9;
-  if (a > b + a)
-    c = 9;
-  if (b > a + b)
-    c = 9;
-  if (b > b + a)
-    c = 9;
-}
-
-// WHILE-LABEL: @arguments_and_commutativity
-void arguments_and_commutativity(unsigned V1, unsigned V2) {
-  if (V1 + V2 < V1)
-    c = 9;
-  if (V1 + V2 < V2)
-    c = 9;
-  if (V2 + V1 < V2)
-    c = 9;
-  if (V2 + V1 < V1)
-    c = 9;
-  if (V1 > V1 + V2)
-    c = 9;
-  if (V1 > V2 + V1)
-    c = 9;
-  if (V2 > V1 + V2)
-    c = 9;
-  if (V2 > V2 + V1)
-    c = 9;
-}
-
-void pointers(unsigned *P1, unsigned *P2, unsigned V1) {
-  if (*P1 + *P2 < *P1)
-    c = 9;
-  if (*P1 + V1 < V1)
-    c = 9;
-  if (V1 + *P2 < *P2)
-    c = 9;
-}
-
-struct OtherStruct {
-  unsigned foo, bar;
-};
-
-struct MyStruct {
-  unsigned base, offset;
-  struct OtherStruct os;
-};
-
-extern struct MyStruct ms;
-
-void structs(void) {
-  if (ms.base + ms.offset < ms.base)
-    c = 9;
-}
-
-void nestedstructs(void) {
-  if (ms.os.foo + ms.os.bar < ms.os.foo)
-    c = 9;
-}
-
-// Normally, this would be folded into a simple call to the overflow handler
-// and a store. Excluding this pattern results in just a store.
-void constants(void) {
-  unsigned base = 4294967295;
-  unsigned offset = 1;
-  if (base + offset < base)
-    c = 9;
-}
-
-void common_while(unsigned i) {
-  // This post-decrement usually causes overflow sanitizers to trip on the very
-  // last operation.
-  while (i--) {
-    some();
-  }
-}
-
-// Normally, these assignments would trip the unsigned overflow sanitizer.
-void negation(void) {
-#define SOME -1UL
-  unsigned long A = -1UL;
-  unsigned long B = -2UL;
-  unsigned long C = -3UL;
-  unsigned long D = -SOME;
-  (void)A;(void)B;(void)C;(void)D;
-}
-
-// cvise'd kernel code that caused problems during development due to sign
-// extension
-typedef unsigned long _size_t;
-int qnbytes;
-int *key_alloc_key;
-_size_t key_alloc_quotalen;
-int *key_alloc(void) {
-  if (qnbytes + key_alloc_quotalen < qnbytes)
-    return key_alloc_key;
-  return key_alloc_key + 3;;
-}
-
-void function_call(void) {
-  if (b + some() < b)
-    c = 9;
-}

From fb9e685fc41b8abc87725f8509624b3a80330dee Mon Sep 17 00:00:00 2001
From: YunQiang Su <syq@debian.org>
Date: Thu, 15 Aug 2024 14:09:36 +0800
Subject: [PATCH 27/47] Intrinsic: introduce minimumnum and maximumnum for IR
 and SelectionDAG (#96649)

C23 introduced new functions fminimum_num and fmaximum_num, and they
follow the minimumNumber and maximumNumber of IEEE754-2019. Let's
introduce new intrinsics to support them.

This patch introduces support only support for scalar values. The
support of
  vector (vp, vp.reduce, vector.reduce),
  experimental.constrained
will be added in future patches.

With this patch, MIPSr6 and LoongArch can work out of box with
fcanonical and fmax/fmin.

Aarch64/PowerPC64 can use the same login as MIPSr6 and LoongArch, while
they have no fcanonical support yet.
I will add it in future patches.

The FMIN/FMAX of RISC-V instructions follows the
minimumNumber/maximumNumber of IEEE754-2019. We can just add it in
future patch.

Background

https://discourse.llvm.org/t/rfc-fix-llvm-min-f-and-llvm-max-f-intrinsics/79735
Currently we have fminnum/fmaxnum, which have different behavior on
different platform for NUM vs sNaN:
   1) Fallback to fmin(3)/fmax(3): return qNaN.
   2) ARM64/ARM32+Neon: same as libc.
   3) MIPSr6/LoongArch/RISC-V: return NUM.

And the fix of fminnum/fmaxnum to follow minNUM/maxNUM of IEEE754-2008
will submit as separated patches.
---
 llvm/docs/LangRef.rst                         | 182 ++++++++
 .../llvm/Analysis/TargetLibraryInfo.def       |  33 ++
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |   6 +
 llvm/include/llvm/CodeGen/ISDOpcodes.h        |   5 +
 llvm/include/llvm/CodeGen/TargetLowering.h    |   5 +
 llvm/include/llvm/IR/IRBuilder.h              |  12 +
 llvm/include/llvm/IR/IntrinsicInst.h          |   2 +
 llvm/include/llvm/IR/Intrinsics.td            |   8 +
 llvm/include/llvm/IR/RuntimeLibcalls.def      |  10 +
 .../include/llvm/Target/TargetSelectionDAG.td |   4 +
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   5 +-
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  17 +
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  42 ++
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |   4 +
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   8 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  24 +
 .../SelectionDAG/SelectionDAGDumper.cpp       |   2 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  88 ++++
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |   1 +
 .../LoongArch/fp-maximumnum-minimumnum.ll     | 431 ++++++++++++++++++
 .../CodeGen/Mips/fp-maximumnum-minimumnum.ll  | 132 ++++++
 .../tools/llvm-tli-checker/ps4-tli-check.yaml |  16 +-
 .../Analysis/TargetLibraryInfoTest.cpp        |   6 +
 23 files changed, 1036 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll
 create mode 100644 llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 0ee4d7b444cfcf..5e5e9b9e8a93b1 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -16131,6 +16131,96 @@ The returned value is completely identical to the input except for the sign bit;
 in particular, if the input is a NaN, then the quiet/signaling bit and payload
 are perfectly preserved.
 
+.. _i_fminmax_family:
+
+'``llvm.min.*``' Intrinsics Comparation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Standard:
+"""""""""
+
+IEEE754 and ISO C define some min/max operations, and they have some differences
+on working with qNaN/sNaN and +0.0/-0.0. Here is the list:
+
+.. list-table::
+   :header-rows: 2
+
+   * - ``ISO C``
+     - fmin/fmax
+     - fmininum/fmaximum
+     - fminimum_num/fmaximum_num
+
+   * - ``IEEE754``
+     - minNum/maxNum (2008)
+     - minimum/maximum (2019)
+     - minimumNumber/maximumNumber (2019)
+
+   * - ``+0.0 vs -0.0``
+     - either one
+     - +0.0 > -0.0
+     - +0.0 > -0.0
+
+   * - ``NUM vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - NUM, invalid exception
+
+   * - ``qNaN vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+
+   * - ``NUM vs qNaN``
+     - NUM, no exception
+     - qNaN, no exception
+     - NUM, no exception
+
+LLVM Implementation:
+""""""""""""""""""""
+
+LLVM implements all ISO C flavors as listed in this table, except in the
+default floating-point environment exceptions are ignored. The constrained
+versions of the intrinsics respect the exception behavior.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 16 28 28 28
+
+   * - Operation
+     - minnum/maxnum
+     - minimum/maximum
+     - minimumnum/maximumnum
+
+   * - ``NUM vs qNaN``
+     - NUM, no exception
+     - qNaN, no exception
+     - NUM, no exception
+
+   * - ``NUM vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - NUM, invalid exception
+
+   * - ``qNaN vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+
+   * - ``sNaN vs sNaN``
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+     - qNaN, invalid exception
+
+   * - ``+0.0 vs -0.0``
+     - either one
+     - +0.0(max)/-0.0(min)
+     - +0.0(max)/-0.0(min)
+
+   * - ``NUM vs NUM``
+     - larger(max)/smaller(min)
+     - larger(max)/smaller(min)
+     - larger(max)/smaller(min)
+
 .. _i_minnum:
 
 '``llvm.minnum.*``' Intrinsic
@@ -16312,6 +16402,98 @@ of the two arguments. -0.0 is considered to be less than +0.0 for this
 intrinsic. Note that these are the semantics specified in the draft of
 IEEE 754-2019.
 
+.. _i_minimumnum:
+
+'``llvm.minimumnum.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.minimumnum`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.minimumnum.f32(float %Val0, float %Val1)
+      declare double    @llvm.minimumnum.f64(double %Val0, double %Val1)
+      declare x86_fp80  @llvm.minimumnum.f80(x86_fp80 %Val0, x86_fp80 %Val1)
+      declare fp128     @llvm.minimumnum.f128(fp128 %Val0, fp128 %Val1)
+      declare ppc_fp128 @llvm.minimumnum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1)
+
+Overview:
+"""""""""
+
+The '``llvm.minimumnum.*``' intrinsics return the minimum of the two
+arguments, not propagating NaNs and treating -0.0 as less than +0.0.
+
+
+Arguments:
+""""""""""
+
+The arguments and return value are floating-point numbers of the same
+type.
+
+Semantics:
+""""""""""
+If both operands are NaNs (including sNaN), returns qNaN. If one operand
+is NaN (including sNaN) and another operand is a number, return the number.
+Otherwise returns the lesser of the two arguments. -0.0 is considered to
+be less than +0.0 for this intrinsic.
+
+Note that these are the semantics of minimumNumber specified in IEEE 754-2019.
+
+It has some differences with '``llvm.minnum.*``':
+1)'``llvm.minnum.*``' will return qNaN if either operand is sNaN.
+2)'``llvm.minnum*``' may return either one if we compare +0.0 vs -0.0.
+
+.. _i_maximumnum:
+
+'``llvm.maximumnum.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.maximumnum`` on any
+floating-point or vector of floating-point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.maximumnum.f32(float %Val0, float %Val1)
+      declare double    @llvm.maximumnum.f64(double %Val0, double %Val1)
+      declare x86_fp80  @llvm.maximumnum.f80(x86_fp80 %Val0, x86_fp80 %Val1)
+      declare fp128     @llvm.maximumnum.f128(fp128 %Val0, fp128 %Val1)
+      declare ppc_fp128 @llvm.maximumnum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1)
+
+Overview:
+"""""""""
+
+The '``llvm.maximumnum.*``' intrinsics return the maximum of the two
+arguments, not propagating NaNs and treating -0.0 as less than +0.0.
+
+
+Arguments:
+""""""""""
+
+The arguments and return value are floating-point numbers of the same
+type.
+
+Semantics:
+""""""""""
+If both operands are NaNs (including sNaN), returns qNaN. If one operand
+is NaN (including sNaN) and another operand is a number, return the number.
+Otherwise returns the greater of the two arguments. -0.0 is considered to
+be less than +0.0 for this intrinsic.
+
+Note that these are the semantics of maximumNumber specified in IEEE 754-2019.
+
+It has some differences with '``llvm.maxnum.*``':
+1)'``llvm.maxnum.*``' will return qNaN if either operand is sNaN.
+2)'``llvm.maxnum*``' may return either one if we compare +0.0 vs -0.0.
+
 .. _int_copysign:
 
 '``llvm.copysign.*``' Intrinsic
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 7be5bb04549c61..e1cb1e5c557eae 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -1388,6 +1388,39 @@ TLI_DEFINE_ENUM_INTERNAL(fminl)
 TLI_DEFINE_STRING_INTERNAL("fminl")
 TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
 
+// Calls to fmaximum_num and fminimum_num library functions expand to the llvm.maximumnum and
+// llvm.minimumnum intrinsics with the correct parameter types for the arguments
+// (all types must match).
+/// double fmaximum_num(double x, double y);
+TLI_DEFINE_ENUM_INTERNAL(fmaximum_num)
+TLI_DEFINE_STRING_INTERNAL("fmaximum_num")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// float fmaximum_numf(float x, float y);
+TLI_DEFINE_ENUM_INTERNAL(fmaximum_numf)
+TLI_DEFINE_STRING_INTERNAL("fmaximum_numf")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// long double fmaximum_numl(long double x, long double y);
+TLI_DEFINE_ENUM_INTERNAL(fmaximum_numl)
+TLI_DEFINE_STRING_INTERNAL("fmaximum_numl")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// double fminimum_num(double x, double y);
+TLI_DEFINE_ENUM_INTERNAL(fminimum_num)
+TLI_DEFINE_STRING_INTERNAL("fminimum_num")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// float fminimum_numf(float x, float y);
+TLI_DEFINE_ENUM_INTERNAL(fminimum_numf)
+TLI_DEFINE_STRING_INTERNAL("fminimum_numf")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
+/// long double fminimum_numl(long double x, long double y);
+TLI_DEFINE_ENUM_INTERNAL(fminimum_numl)
+TLI_DEFINE_STRING_INTERNAL("fminimum_numl")
+TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same)
+
 /// double fmod(double x, double y);
 TLI_DEFINE_ENUM_INTERNAL(fmod)
 TLI_DEFINE_STRING_INTERNAL("fmod")
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 279cfb5aa47d6f..77ddc10e8a0e76 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -2037,6 +2037,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     case Intrinsic::maximum:
       ISD = ISD::FMAXIMUM;
       break;
+    case Intrinsic::minimumnum:
+      ISD = ISD::FMINIMUMNUM;
+      break;
+    case Intrinsic::maximumnum:
+      ISD = ISD::FMAXIMUMNUM;
+      break;
     case Intrinsic::copysign:
       ISD = ISD::FCOPYSIGN;
       break;
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 7305e3086fcd65..b8f8818a749528 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1047,6 +1047,11 @@ enum NodeType {
   FMINIMUM,
   FMAXIMUM,
 
+  /// FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with
+  /// FMINNUM_IEEE and FMAXNUM_IEEE besides if either operand is sNaN.
+  FMINIMUMNUM,
+  FMAXIMUMNUM,
+
   /// FSINCOS - Compute both fsin and fcos as a single operation.
   FSINCOS,
 
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index deb1d04df3400c..eda38cd8a564d6 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2908,6 +2908,8 @@ class TargetLoweringBase {
     case ISD::FMAXNUM_IEEE:
     case ISD::FMINIMUM:
     case ISD::FMAXIMUM:
+    case ISD::FMINIMUMNUM:
+    case ISD::FMAXIMUMNUM:
     case ISD::AVGFLOORS:
     case ISD::AVGFLOORU:
     case ISD::AVGCEILS:
@@ -5283,6 +5285,9 @@ class TargetLowering : public TargetLoweringBase {
   /// Expand fminimum/fmaximum into multiple comparison with selects.
   SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const;
 
+  /// Expand fminimumnum/fmaximumnum into multiple comparison with selects.
+  SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const;
+
   /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
   /// \param N Node to expand
   /// \returns The expansion result
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 3f3d75012c6945..0dbcbc0b2cb76f 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1015,6 +1015,18 @@ class IRBuilderBase {
     return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name);
   }
 
+  /// Create call to the minimumnum intrinsic.
+  Value *CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateBinaryIntrinsic(Intrinsic::minimumnum, LHS, RHS, nullptr,
+                                 Name);
+  }
+
+  /// Create call to the maximum intrinsic.
+  Value *CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateBinaryIntrinsic(Intrinsic::maximumnum, LHS, RHS, nullptr,
+                                 Name);
+  }
+
   /// Create call to the copysign intrinsic.
   Value *CreateCopySign(Value *LHS, Value *RHS,
                         Instruction *FMFSource = nullptr,
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 94c8fa092f45e6..2f1e2c08c3ecec 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -76,6 +76,8 @@ class IntrinsicInst : public CallInst {
     case Intrinsic::minnum:
     case Intrinsic::maximum:
     case Intrinsic::minimum:
+    case Intrinsic::maximumnum:
+    case Intrinsic::minimumnum:
     case Intrinsic::smax:
     case Intrinsic::smin:
     case Intrinsic::umax:
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b4e758136b39fb..0841273fd2e1e5 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1085,6 +1085,14 @@ def int_maximum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
   [LLVMMatchType<0>, LLVMMatchType<0>],
   [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
 >;
+def int_minimumnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
+>;
+def int_maximumnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
+  [LLVMMatchType<0>, LLVMMatchType<0>],
+  [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative]
+>;
 
 // Internal interface for object size checking
 def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty],
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 89aaf6d1ad83f8..c3d5ef9f4e4f82 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -299,6 +299,16 @@ HANDLE_LIBCALL(FMAX_F64, "fmax")
 HANDLE_LIBCALL(FMAX_F80, "fmaxl")
 HANDLE_LIBCALL(FMAX_F128, "fmaxl")
 HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl")
+HANDLE_LIBCALL(FMINIMUMNUM_F32, "fminimum_numf")
+HANDLE_LIBCALL(FMINIMUMNUM_F64, "fminimum_num")
+HANDLE_LIBCALL(FMINIMUMNUM_F80, "fminimum_numl")
+HANDLE_LIBCALL(FMINIMUMNUM_F128, "fminmum_numl")
+HANDLE_LIBCALL(FMINIMUMNUM_PPCF128, "fminimum_numl")
+HANDLE_LIBCALL(FMAXIMUMNUM_F32, "fmaximum_numf")
+HANDLE_LIBCALL(FMAXIMUMNUM_F64, "fmaximum_num")
+HANDLE_LIBCALL(FMAXIMUMNUM_F80, "fmaximum_numl")
+HANDLE_LIBCALL(FMAXIMUMNUM_F128, "fmaxmum_numl")
+HANDLE_LIBCALL(FMAXIMUMNUM_PPCF128, "fmaximum_numl")
 HANDLE_LIBCALL(LROUND_F32, "lroundf")
 HANDLE_LIBCALL(LROUND_F64, "lround")
 HANDLE_LIBCALL(LROUND_F80, "lroundl")
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index e40ad2062166ea..172deffbd31771 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -517,6 +517,10 @@ def fminimum   : SDNode<"ISD::FMINIMUM"   , SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def fmaximum   : SDNode<"ISD::FMAXIMUM"   , SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
+def fminimumnum   : SDNode<"ISD::FMINIMUMNUM"   , SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def fmaximumnum   : SDNode<"ISD::FMAXIMUMNUM"   , SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
 def fgetsign   : SDNode<"ISD::FGETSIGN"   , SDTFPToIntOp>;
 def fcanonicalize : SDNode<"ISD::FCANONICALIZE", SDTFPUnaryOp>;
 def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0aa8b82f533f2a..25644c24855a62 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1939,7 +1939,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
   case ISD::FMINIMUM:
-  case ISD::FMAXIMUM:           return visitFMinMax(N);
+  case ISD::FMAXIMUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:       return visitFMinMax(N);
   case ISD::FCEIL:              return visitFCEIL(N);
   case ISD::FTRUNC:             return visitFTRUNC(N);
   case ISD::FFREXP:             return visitFFREXP(N);
@@ -6068,6 +6070,7 @@ static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
   return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
 }
 
+// FIXME: use FMINIMUMNUM if possible, such as for RISC-V.
 static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
                                      ISD::CondCode CC, unsigned OrAndOpcode,
                                      SelectionDAG &DAG,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 3eadfbf51ddaa1..e7f765382b0e46 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3660,6 +3660,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       Results.push_back(Expanded);
     break;
   }
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM: {
+    Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
+    break;
+  }
   case ISD::FSIN:
   case ISD::FCOS: {
     EVT VT = Node->getValueType(0);
@@ -4539,6 +4544,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                     RTLIB::FMAX_F80, RTLIB::FMAX_F128,
                     RTLIB::FMAX_PPCF128, Results);
     break;
+  case ISD::FMINIMUMNUM:
+    ExpandFPLibCall(Node, RTLIB::FMINIMUMNUM_F32, RTLIB::FMINIMUMNUM_F64,
+                    RTLIB::FMINIMUMNUM_F80, RTLIB::FMINIMUMNUM_F128,
+                    RTLIB::FMINIMUMNUM_PPCF128, Results);
+    break;
+  case ISD::FMAXIMUMNUM:
+    ExpandFPLibCall(Node, RTLIB::FMAXIMUMNUM_F32, RTLIB::FMAXIMUMNUM_F64,
+                    RTLIB::FMAXIMUMNUM_F80, RTLIB::FMAXIMUMNUM_F128,
+                    RTLIB::FMAXIMUMNUM_PPCF128, Results);
+    break;
   case ISD::FSQRT:
   case ISD::STRICT_FSQRT:
     ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
@@ -5464,6 +5479,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FMAXNUM:
   case ISD::FMINIMUM:
   case ISD::FMAXIMUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM:
   case ISD::FPOW:
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 0c881d81a2c639..ad0c054d3ccd50 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -74,6 +74,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FMINNUM:     R = SoftenFloatRes_FMINNUM(N); break;
     case ISD::STRICT_FMAXNUM:
     case ISD::FMAXNUM:     R = SoftenFloatRes_FMAXNUM(N); break;
+    case ISD::FMINIMUMNUM:    R = SoftenFloatRes_FMINIMUMNUM(N); break;
+    case ISD::FMAXIMUMNUM:    R = SoftenFloatRes_FMAXIMUMNUM(N); break;
     case ISD::STRICT_FADD:
     case ISD::FADD:        R = SoftenFloatRes_FADD(N); break;
     case ISD::STRICT_FACOS:
@@ -323,6 +325,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
                                                RTLIB::FMAX_PPCF128));
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMINIMUMNUM(SDNode *N) {
+  return SoftenFloatRes_Binary(
+      N, GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUMNUM_F32,
+                      RTLIB::FMINIMUMNUM_F64, RTLIB::FMINIMUMNUM_F80,
+                      RTLIB::FMINIMUMNUM_F128, RTLIB::FMINIMUMNUM_PPCF128));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXIMUMNUM(SDNode *N) {
+  return SoftenFloatRes_Binary(
+      N, GetFPLibCall(N->getValueType(0), RTLIB::FMAXIMUMNUM_F32,
+                      RTLIB::FMAXIMUMNUM_F64, RTLIB::FMAXIMUMNUM_F80,
+                      RTLIB::FMAXIMUMNUM_F128, RTLIB::FMAXIMUMNUM_PPCF128));
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
   return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
                                                RTLIB::ADD_F32,
@@ -1404,6 +1420,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FMINNUM:    ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
   case ISD::STRICT_FMAXNUM:
   case ISD::FMAXNUM:    ExpandFloatRes_FMAXNUM(N, Lo, Hi); break;
+  case ISD::FMINIMUMNUM: ExpandFloatRes_FMINIMUMNUM(N, Lo, Hi); break;
+  case ISD::FMAXIMUMNUM: ExpandFloatRes_FMAXIMUMNUM(N, Lo, Hi); break;
   case ISD::STRICT_FADD:
   case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
   case ISD::STRICT_FACOS:
@@ -1558,6 +1576,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo,
                                         RTLIB::FMAX_PPCF128), Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  ExpandFloatRes_Binary(
+      N,
+      GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUMNUM_F32,
+                   RTLIB::FMINIMUMNUM_F64, RTLIB::FMINIMUMNUM_F80,
+                   RTLIB::FMINIMUMNUM_F128, RTLIB::FMINIMUMNUM_PPCF128),
+      Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMAXIMUMNUM(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  ExpandFloatRes_Binary(
+      N,
+      GetFPLibCall(N->getValueType(0), RTLIB::FMAXIMUMNUM_F32,
+                   RTLIB::FMAXIMUMNUM_F64, RTLIB::FMAXIMUMNUM_F80,
+                   RTLIB::FMAXIMUMNUM_F128, RTLIB::FMAXIMUMNUM_PPCF128),
+      Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
@@ -2621,6 +2659,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FDIV:
     case ISD::FMAXIMUM:
     case ISD::FMINIMUM:
+    case ISD::FMAXIMUMNUM:
+    case ISD::FMINIMUMNUM:
     case ISD::FMAXNUM:
     case ISD::FMINNUM:
     case ISD::FMAXNUM_IEEE:
@@ -3063,6 +3103,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
   case ISD::FDIV:
   case ISD::FMAXIMUM:
   case ISD::FMINIMUM:
+  case ISD::FMAXIMUMNUM:
+  case ISD::FMINIMUMNUM:
   case ISD::FMAXNUM:
   case ISD::FMINNUM:
   case ISD::FMUL:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3a49a8ff10860a..6de1e3eca7feda 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -567,6 +567,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SoftenFloatRes_FATAN(SDNode *N);
   SDValue SoftenFloatRes_FMINNUM(SDNode *N);
   SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
+  SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N);
+  SDValue SoftenFloatRes_FMAXIMUMNUM(SDNode *N);
   SDValue SoftenFloatRes_FADD(SDNode *N);
   SDValue SoftenFloatRes_FCBRT(SDNode *N);
   SDValue SoftenFloatRes_FCEIL(SDNode *N);
@@ -659,6 +661,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void ExpandFloatRes_FATAN     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMINNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMAXNUM   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMAXIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCBRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ab12c3b0e728a8..7bf90ceb93cb4e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5465,7 +5465,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
     return false;
   }
   case ISD::FMINNUM:
-  case ISD::FMAXNUM: {
+  case ISD::FMAXNUM:
+  case ISD::FMINIMUMNUM:
+  case ISD::FMAXIMUMNUM: {
     // Only one needs to be known not-nan, since it will be returned if the
     // other ends up being one.
     return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) ||
@@ -6804,6 +6806,10 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
       return getConstantFP(minimum(C1, C2), DL, VT);
     case ISD::FMAXIMUM:
       return getConstantFP(maximum(C1, C2), DL, VT);
+    case ISD::FMINIMUMNUM:
+      return getConstantFP(minimumnum(C1, C2), DL, VT);
+    case ISD::FMAXIMUMNUM:
+      return getConstantFP(maximumnum(C1, C2), DL, VT);
     default: break;
     }
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 37ba62911ec70b..7cdd3d47b641d7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6882,6 +6882,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                              getValue(I.getArgOperand(0)),
                              getValue(I.getArgOperand(1)), Flags));
     return;
+  case Intrinsic::minimumnum:
+    setValue(&I, DAG.getNode(ISD::FMINIMUMNUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)), Flags));
+    return;
+  case Intrinsic::maximumnum:
+    setValue(&I, DAG.getNode(ISD::FMAXIMUMNUM, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)), Flags));
+    return;
   case Intrinsic::copysign:
     setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
@@ -9257,6 +9269,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
         if (visitBinaryFloatCall(I, ISD::FMAXNUM))
           return;
         break;
+      case LibFunc_fminimum_num:
+      case LibFunc_fminimum_numf:
+      case LibFunc_fminimum_numl:
+        if (visitBinaryFloatCall(I, ISD::FMINIMUMNUM))
+          return;
+        break;
+      case LibFunc_fmaximum_num:
+      case LibFunc_fmaximum_numf:
+      case LibFunc_fmaximum_numl:
+        if (visitBinaryFloatCall(I, ISD::FMAXIMUMNUM))
+          return;
+        break;
       case LibFunc_sin:
       case LibFunc_sinf:
       case LibFunc_sinl:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 46e8e54ee4ed7d..001f782f209fdb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -203,6 +203,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::STRICT_FMINIMUM:            return "strict_fminimum";
   case ISD::FMAXIMUM:                   return "fmaximum";
   case ISD::STRICT_FMAXIMUM:            return "strict_fmaximum";
+  case ISD::FMINIMUMNUM:                return "fminimumnum";
+  case ISD::FMAXIMUMNUM:                return "fmaximumnum";
   case ISD::FNEG:                       return "fneg";
   case ISD::FSQRT:                      return "fsqrt";
   case ISD::STRICT_FSQRT:               return "strict_fsqrt";
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4cf1e655b00990..2c939967a5e1d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8558,6 +8558,94 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
   return MinMax;
 }
 
+SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
+                                                      SelectionDAG &DAG) const {
+  SDLoc DL(Node);
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  unsigned Opc = Node->getOpcode();
+  EVT VT = Node->getValueType(0);
+  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  bool IsMax = Opc == ISD::FMAXIMUMNUM;
+  const TargetOptions &Options = DAG.getTarget().Options;
+  SDNodeFlags Flags = Node->getFlags();
+
+  unsigned NewOp =
+      Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
+
+  if (isOperationLegalOrCustom(NewOp, VT)) {
+    if (!Flags.hasNoNaNs()) {
+      // Insert canonicalizes if it's possible we need to quiet to get correct
+      // sNaN behavior.
+      if (!DAG.isKnownNeverSNaN(LHS)) {
+        LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags);
+      }
+      if (!DAG.isKnownNeverSNaN(RHS)) {
+        RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags);
+      }
+    }
+
+    return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags);
+  }
+
+  // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has
+  // same behaviors for all of other cases: +0.0 vs -0.0 included.
+  if (Flags.hasNoNaNs() ||
+      (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) {
+    unsigned IEEE2019Op =
+        Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
+    if (isOperationLegalOrCustom(IEEE2019Op, VT))
+      return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags);
+  }
+
+  // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return
+  // either one for +0.0 vs -0.0.
+  if ((Flags.hasNoNaNs() ||
+       (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) &&
+      (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) ||
+       DAG.isKnownNeverZeroFloat(RHS))) {
+    unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM;
+    if (isOperationLegalOrCustom(IEEE2008Op, VT))
+      return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags);
+  }
+
+  // If only one operand is NaN, override it with another operand.
+  if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) {
+    LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO);
+  }
+  if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) {
+    RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO);
+  }
+
+  SDValue MinMax =
+      DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT);
+  // If MinMax is NaN, let's quiet it.
+  if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) &&
+      !DAG.isKnownNeverNaN(RHS)) {
+    SDValue MinMaxQuiet =
+        DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags);
+    MinMax =
+        DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO);
+  }
+
+  // Fixup signed zero behavior.
+  if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() ||
+      DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
+    return MinMax;
+  }
+  SDValue TestZero =
+      DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
+  SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
+                                DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
+  SDValue LCmp = DAG.getSelect(
+      DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
+      MinMax, Flags);
+  SDValue RCmp = DAG.getSelect(
+      DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
+      Flags);
+  return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
+}
+
 /// Returns a true value if if this FPClassTest can be performed with an ordered
 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
 /// std::nullopt if it cannot be performed as a compare with 0.
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 149b5dabee0565..4ff8617f740c89 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -713,6 +713,7 @@ void TargetLoweringBase::initActions() {
                         ISD::FMINNUM,        ISD::FMAXNUM,
                         ISD::FMINNUM_IEEE,   ISD::FMAXNUM_IEEE,
                         ISD::FMINIMUM,       ISD::FMAXIMUM,
+                        ISD::FMINIMUMNUM,    ISD::FMAXIMUMNUM,
                         ISD::FMAD,           ISD::SMIN,
                         ISD::SMAX,           ISD::UMIN,
                         ISD::UMAX,           ISD::ABS,
diff --git a/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..b4fdd954b856c8
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,431 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F
+; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D
+; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F
+; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+;
+; LA32F-LABEL: maximumnum_float:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_float:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_float:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_float:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+;
+; LA32F-LABEL: maximumnum_float_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_float_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_float_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_float_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+;
+; LA32F-LABEL: maximumnum_float_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_float_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_float_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_float_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+;
+; LA32F-LABEL: maximumnum_double:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fmaximum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_double:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_double:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fmaximum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_double:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+;
+; LA32F-LABEL: maximumnum_double_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fmaximum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_double_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_double_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fmaximum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_double_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+;
+; LA32F-LABEL: maximumnum_double_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fmaximum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: maximumnum_double_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: maximumnum_double_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fmaximum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: maximumnum_double_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+;
+; LA32F-LABEL: minimumnum_float:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_float:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_float:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_float:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nsz(float %x, float %y) {
+;
+; LA32F-LABEL: minimumnum_float_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_float_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_float_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64F-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_float_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.s $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.s $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nnan(float %x, float %y) {
+;
+; LA32F-LABEL: minimumnum_float_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_float_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_float_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_float_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmin.s $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define double @minimumnum_double(double %x, double %y) {
+;
+; LA32F-LABEL: minimumnum_double:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fminimum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_double:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_double:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fminimum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_double:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nsz(double %x, double %y) {
+;
+; LA32F-LABEL: minimumnum_double_nsz:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fminimum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_double_nsz:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA32D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA32D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_double_nsz:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fminimum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_double_nsz:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmax.d $fa1, $fa1, $fa1
+; LA64D-NEXT:    fmax.d $fa0, $fa0, $fa0
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nsz double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nnan(double %x, double %y) {
+;
+; LA32F-LABEL: minimumnum_double_nnan:
+; LA32F:       # %bb.0:
+; LA32F-NEXT:    addi.w $sp, $sp, -16
+; LA32F-NEXT:    .cfi_def_cfa_offset 16
+; LA32F-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32F-NEXT:    .cfi_offset 1, -4
+; LA32F-NEXT:    bl %plt(fminimum_num)
+; LA32F-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32F-NEXT:    addi.w $sp, $sp, 16
+; LA32F-NEXT:    ret
+;
+; LA32D-LABEL: minimumnum_double_nnan:
+; LA32D:       # %bb.0:
+; LA32D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA32D-NEXT:    ret
+;
+; LA64F-LABEL: minimumnum_double_nnan:
+; LA64F:       # %bb.0:
+; LA64F-NEXT:    addi.d $sp, $sp, -16
+; LA64F-NEXT:    .cfi_def_cfa_offset 16
+; LA64F-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64F-NEXT:    .cfi_offset 1, -8
+; LA64F-NEXT:    bl %plt(fminimum_num)
+; LA64F-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64F-NEXT:    addi.d $sp, $sp, 16
+; LA64F-NEXT:    ret
+;
+; LA64D-LABEL: minimumnum_double_nnan:
+; LA64D:       # %bb.0:
+; LA64D-NEXT:    fmin.d $fa0, $fa0, $fa1
+; LA64D-NEXT:    ret
+  %z = call nnan double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
diff --git a/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll
new file mode 100644
index 00000000000000..bc81966ca0f5c9
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll
@@ -0,0 +1,132 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=mipsisa32r6 < %s | FileCheck %s --check-prefix=MIPS32R6
+
+declare float @llvm.maximumnum.f32(float, float)
+declare double @llvm.maximumnum.f64(double, double)
+declare float @llvm.minimumnum.f32(float, float)
+declare double @llvm.minimumnum.f64(double, double)
+
+define float @maximumnum_float(float %x, float %y) {
+; MIPS32R6-LABEL: maximumnum_float:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f1, $f0
+  %z = call float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nsz(float %x, float %y) {
+; MIPS32R6-LABEL: maximumnum_float_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f1, $f0
+  %z = call nsz float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @maximumnum_float_nnan(float %x, float %y) {
+; MIPS32R6-LABEL: maximumnum_float_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.s $f0, $f12, $f14
+  %z = call nnan float @llvm.maximumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+
+define double @maximumnum_double(double %x, double %y) {
+; MIPS32R6-LABEL: maximumnum_double:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f1, $f0
+  %z = call double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nsz(double %x, double %y) {
+; MIPS32R6-LABEL: maximumnum_double_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f1, $f0
+  %z = call nsz double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @maximumnum_double_nnan(double %x, double %y) {
+; MIPS32R6-LABEL: maximumnum_double_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    max.d $f0, $f12, $f14
+  %z = call nnan double @llvm.maximumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define float @minimumnum_float(float %x, float %y) {
+; MIPS32R6-LABEL: minimumnum_float:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f1, $f0
+  %z = call float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nsz(float %x, float %y) {
+; MIPS32R6-LABEL: minimumnum_float_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.s $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.s $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f1, $f0
+  %z = call nsz float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define float @minimumnum_float_nnan(float %x, float %y) {
+; MIPS32R6-LABEL: minimumnum_float_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.s $f0, $f12, $f14
+  %z = call nnan float @llvm.minimumnum.f32(float %x, float %y)
+  ret float %z
+}
+
+define double @minimumnum_double(double %x, double %y) {
+; MIPS32R6-LABEL: minimumnum_double:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f1, $f0
+  %z = call double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nsz(double %x, double %y) {
+; MIPS32R6-LABEL: minimumnum_double_nsz:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    min.d $f0, $f14, $f14
+; MIPS32R6-NEXT:    min.d $f1, $f12, $f12
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f1, $f0
+  %z = call nsz double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
+
+define double @minimumnum_double_nnan(double %x, double %y) {
+; MIPS32R6-LABEL: minimumnum_double_nnan:
+; MIPS32R6:       # %bb.0:
+; MIPS32R6-NEXT:    jr $ra
+; MIPS32R6-NEXT:    min.d $f0, $f12, $f14
+  %z = call nnan double @llvm.minimumnum.f64(double %x, double %y)
+  ret double %z
+}
diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
index 9b37b49b3d49d2..cff5019f8e6ee4 100644
--- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
+++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml
@@ -32,15 +32,21 @@
 # RUN: FileCheck %s --check-prefix=AVAIL --input-file %t3.txt
 # RUN: FileCheck %s --check-prefix=UNAVAIL --input-file %t3.txt
 #
-# CHECK: << Total TLI yes SDK no:  12
+# CHECK: << Total TLI yes SDK no:  18
 # CHECK: >> Total TLI no  SDK yes: 0
 # CHECK: == Total TLI yes SDK yes: 248
 #
 # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*)
 # WRONG_DETAIL: >> TLI no  SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int)
-# WRONG_DETAIL-COUNT-8: << TLI yes SDK no : '_Zn{{.*}}__hot_cold_t
+# WRONG_DETAIL-COUNT-8: << TLI yes SDK no : {{.*}}__hot_cold_t
 # WRONG_DETAIL-COUNT-4: << TLI yes SDK no : '__size_returning_new{{.*}}
-# WRONG_SUMMARY: << Total TLI yes SDK no:  13{{$}}
+# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_num'
+# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_numf'
+# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_numl'
+# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_num'
+# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numf'
+# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl'
+# WRONG_SUMMARY: << Total TLI yes SDK no:  19{{$}}
 # WRONG_SUMMARY: >> Total TLI no  SDK yes: 1{{$}}
 # WRONG_SUMMARY: == Total TLI yes SDK yes: 247
 #
@@ -48,8 +54,8 @@
 ## the exact count first; the two directives should add up to that.
 ## Yes, this means additions to TLI will fail this test, but the argument
 ## to -COUNT can't be an expression.
-# AVAIL: TLI knows 493 symbols, 260 available
-# AVAIL-COUNT-260: {{^}} available
+# AVAIL: TLI knows 499 symbols, 266 available
+# AVAIL-COUNT-266: {{^}} available
 # AVAIL-NOT:       {{^}} available
 # UNAVAIL-COUNT-233: not available
 # UNAVAIL-NOT:       not available
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 68bf8e670771ee..ff7dec5bee31df 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -212,6 +212,12 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare double @fmin(double, double)\n"
       "declare float @fminf(float, float)\n"
       "declare x86_fp80 @fminl(x86_fp80, x86_fp80)\n"
+      "declare double @fmaximum_num(double, double)\n"
+      "declare float @fmaximum_numf(float, float)\n"
+      "declare x86_fp80 @fmaximum_numl(x86_fp80, x86_fp80)\n"
+      "declare double @fminimum_num(double, double)\n"
+      "declare float @fminimum_numf(float, float)\n"
+      "declare x86_fp80 @fminimum_numl(x86_fp80, x86_fp80)\n"
       "declare double @fmod(double, double)\n"
       "declare float @fmodf(float, float)\n"
       "declare x86_fp80 @fmodl(x86_fp80, x86_fp80)\n"

From 8d037107287b85dcc8a0f0af75bd8ae5c07facb5 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Thu, 15 Aug 2024 08:17:22 +0200
Subject: [PATCH 28/47] [ctx_prof] Remove an unneeded include in
 CtxProfAnalysis.cpp

---
 llvm/lib/Analysis/CtxProfAnalysis.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 5bf336dd311158..7b4666b29a1936 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 
 #define DEBUG_TYPE "ctx_prof"
 

From 372842b30f8e611765e3cb9f06b8265d2e79f3f6 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye@intel.com>
Date: Thu, 15 Aug 2024 14:18:59 +0800
Subject: [PATCH 29/47] [X86][MC] Remove CMPCCXADD's CondCode flavor. (#103898)

To align with gas's latest changes.
relate gas patch:
https://sourceware.org/pipermail/binutils/2024-May/134360.html
---
 clang/test/CodeGen/X86/cmpccxadd-builtins.c   |  48 +--
 .../X86/MCTargetDesc/X86InstPrinterCommon.cpp |  13 +-
 llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll | 120 ++++----
 .../MC/Disassembler/X86/apx/cmpccxadd.txt     |  80 ++---
 .../test/MC/Disassembler/X86/cmpccxadd-64.txt | 288 +++++++++---------
 llvm/test/MC/X86/apx/cmpccxadd-att.s          |  80 ++---
 llvm/test/MC/X86/apx/cmpccxadd-intel.s        |  80 ++---
 llvm/test/MC/X86/cmpccxadd-att-alias.s        |  26 +-
 llvm/test/MC/X86/cmpccxadd-att.s              | 288 +++++++++---------
 llvm/test/MC/X86/cmpccxadd-intel-alias.s      |  26 +-
 llvm/test/MC/X86/cmpccxadd-intel.s            | 288 +++++++++---------
 11 files changed, 668 insertions(+), 669 deletions(-)

diff --git a/clang/test/CodeGen/X86/cmpccxadd-builtins.c b/clang/test/CodeGen/X86/cmpccxadd-builtins.c
index 6daed3a1b17b67..f058dc9b2baa46 100644
--- a/clang/test/CodeGen/X86/cmpccxadd-builtins.c
+++ b/clang/test/CodeGen/X86/cmpccxadd-builtins.c
@@ -52,50 +52,50 @@ long long test_cmplxadd64(void *__A, long long __B, long long __C) {
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NB);
 }
 
-int test_cmpnbexadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnbexadd32(
+int test_cmpaxadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpaxadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 4)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_Z);
 }
 
-long long test_cmpnbexadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnbexadd64(
+long long test_cmpaxadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpaxadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 4)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_Z);
 }
 
-int test_cmpnbxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnbxadd32(
+int test_cmpaexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpaexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 5)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NZ);
 }
 
-long long test_cmpnbxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnbxadd64(
+long long test_cmpaexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpaexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 5)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NZ);
 }
 
-int test_cmpnlexadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnlexadd32(
+int test_cmpgxadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpgxadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 6)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_BE);
 }
 
-long long test_cmpnlexadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnlexadd64(
+long long test_cmpgxadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpgxadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 6)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_BE);
 }
 
-int test_cmpnlxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnlxadd32(
+int test_cmpgexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpgexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 7)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NBE);
 }
 
-long long test_cmpnlxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnlxadd64(
+long long test_cmpgexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpgexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 7)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NBE);
 }
@@ -136,14 +136,14 @@ long long test_cmpnsxadd64(void *__A, long long __B, long long __C) {
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_P);
 }
 
-int test_cmpnzxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpnzxadd32(
+int test_cmpnexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpnexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 11)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NP);
 }
 
-long long test_cmpnzxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpnzxadd64(
+long long test_cmpnexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpnexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 11)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NP);
 }
@@ -184,14 +184,14 @@ long long test_cmpsxadd64(void *__A, long long __B, long long __C) {
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_LE);
 }
 
-int test_cmpzxadd32(void *__A, int __B, int __C) {
-  // CHECK-LABEL: @test_cmpzxadd32(
+int test_cmpexadd32(void *__A, int __B, int __C) {
+  // CHECK-LABEL: @test_cmpexadd32(
   // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 15)
   return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NLE);
 }
 
-long long test_cmpzxadd64(void *__A, long long __B, long long __C) {
-  // CHECK-LABEL: @test_cmpzxadd64(
+long long test_cmpexadd64(void *__A, long long __B, long long __C) {
+  // CHECK-LABEL: @test_cmpexadd64(
   // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 15)
   return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NLE);
 }
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
index 21c1556d1d8ed2..ad1f2dc532d1c2 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp
@@ -30,7 +30,6 @@ void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op,
                                          raw_ostream &O) {
   int64_t Imm = MI->getOperand(Op).getImm();
   unsigned Opc = MI->getOpcode();
-  bool IsCMPCCXADD = X86::isCMPCCXADD(Opc);
   bool IsCCMPOrCTEST = X86::isCCMPCC(Opc) || X86::isCTESTCC(Opc);
 
   // clang-format off
@@ -39,19 +38,19 @@ void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op,
   case    0: O << "o";  break;
   case    1: O << "no"; break;
   case    2: O << "b";  break;
-  case    3: O << (IsCMPCCXADD ? "nb" : "ae"); break;
-  case    4: O << (IsCMPCCXADD ?  "z" :  "e"); break;
-  case    5: O << (IsCMPCCXADD ? "nz" : "ne"); break;
+  case    3: O << "ae"; break;
+  case    4: O << "e";  break;
+  case    5: O << "ne"; break;
   case    6: O << "be"; break;
-  case    7: O << (IsCMPCCXADD ? "nbe" : "a"); break;
+  case    7: O << "a";  break;
   case    8: O << "s";  break;
   case    9: O << "ns"; break;
   case  0xa: O << (IsCCMPOrCTEST ? "t" : "p");  break;
   case  0xb: O << (IsCCMPOrCTEST ? "f" : "np"); break;
   case  0xc: O << "l";  break;
-  case  0xd: O << (IsCMPCCXADD ? "nl" : "ge"); break;
+  case  0xd: O << "ge"; break;
   case  0xe: O << "le"; break;
-  case  0xf: O << (IsCMPCCXADD ? "nle" : "g"); break;
+  case  0xf: O << "g";  break;
   }
   // clang-format on
 }
diff --git a/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll b/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll
index f88216f95a7614..561289c1b77465 100644
--- a/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll
@@ -112,13 +112,13 @@ define dso_local i32 @test_cmplxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
 ; CHECK-LABEL: test_cmplxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnbxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe3,0x07]
+; CHECK-NEXT:    cmpaexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe3,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmplxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnbxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe3,0x07]
+; EGPR-NEXT:    cmpaexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe3,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 3)
@@ -129,95 +129,95 @@ define dso_local i64 @test_cmplxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
 ; CHECK-LABEL: test_cmplxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnbxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
+; CHECK-NEXT:    cmpaexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmplxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnbxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
+; EGPR-NEXT:    cmpaexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe3,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 3)
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnbexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbexadd32:
+define dso_local i32 @test_cmpaxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpaxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpzxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe4,0x07]
+; CHECK-NEXT:    cmpexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe4,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbexadd32:
+; EGPR-LABEL: test_cmpaxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpzxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe4,0x07]
+; EGPR-NEXT:    cmpexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe4,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 4)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnbexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbexadd64:
+define dso_local i64 @test_cmpaxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpaxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpzxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
+; CHECK-NEXT:    cmpexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbexadd64:
+; EGPR-LABEL: test_cmpaxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpzxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
+; EGPR-NEXT:    cmpexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe4,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 4)
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnbxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbxadd32:
+define dso_local i32 @test_cmpaexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpaexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnzxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe5,0x07]
+; CHECK-NEXT:    cmpnexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe5,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbxadd32:
+; EGPR-LABEL: test_cmpaexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnzxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe5,0x07]
+; EGPR-NEXT:    cmpnexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe5,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 5)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnbxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnbxadd64:
+define dso_local i64 @test_cmpaexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpaexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnzxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
+; CHECK-NEXT:    cmpnexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnbxadd64:
+; EGPR-LABEL: test_cmpaexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnzxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
+; EGPR-NEXT:    cmpnexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe5,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 5)
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnlexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlexadd32:
+define dso_local i32 @test_cmpgxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpgxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; CHECK-NEXT:    cmpbexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe6,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlexadd32:
+; EGPR-LABEL: test_cmpgxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; EGPR-NEXT:    cmpbexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe6,0x07]
@@ -227,14 +227,14 @@ entry:
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnlexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlexadd64:
+define dso_local i64 @test_cmpgxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpgxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; CHECK-NEXT:    cmpbexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe6,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlexadd64:
+; EGPR-LABEL: test_cmpgxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; EGPR-NEXT:    cmpbexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe6,0x07]
@@ -244,34 +244,34 @@ entry:
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnlxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlxadd32:
+define dso_local i32 @test_cmpgexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpgexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnbexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe7,0x07]
+; CHECK-NEXT:    cmpaxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe7,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlxadd32:
+; EGPR-LABEL: test_cmpgexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnbexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe7,0x07]
+; EGPR-NEXT:    cmpaxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe7,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 7)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnlxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnlxadd64:
+define dso_local i64 @test_cmpgexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpgexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnbexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
+; CHECK-NEXT:    cmpaxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnlxadd64:
+; EGPR-LABEL: test_cmpgexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnbexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
+; EGPR-NEXT:    cmpaxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe7,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 7)
@@ -380,14 +380,14 @@ entry:
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpnzxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpnzxadd32:
+define dso_local i32 @test_cmpnexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpnexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; CHECK-NEXT:    cmpnpxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xeb,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnzxadd32:
+; EGPR-LABEL: test_cmpnexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
 ; EGPR-NEXT:    cmpnpxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xeb,0x07]
@@ -397,14 +397,14 @@ entry:
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpnzxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpnzxadd64:
+define dso_local i64 @test_cmpnexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpnexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; CHECK-NEXT:    cmpnpxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xeb,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpnzxadd64:
+; EGPR-LABEL: test_cmpnexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
 ; EGPR-NEXT:    cmpnpxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xeb,0x07]
@@ -452,13 +452,13 @@ define dso_local i32 @test_cmppxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
 ; CHECK-LABEL: test_cmppxadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnlxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xed,0x07]
+; CHECK-NEXT:    cmpgexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xed,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmppxadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnlxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xed,0x07]
+; EGPR-NEXT:    cmpgexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xed,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 13)
@@ -469,13 +469,13 @@ define dso_local i64 @test_cmppxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
 ; CHECK-LABEL: test_cmppxadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnlxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xed,0x07]
+; CHECK-NEXT:    cmpgexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xed,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
 ; EGPR-LABEL: test_cmppxadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnlxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xed,0x07]
+; EGPR-NEXT:    cmpgexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xed,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 13)
@@ -516,34 +516,34 @@ entry:
   ret i64 %0
 }
 
-define dso_local i32 @test_cmpzxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
-; CHECK-LABEL: test_cmpzxadd32:
+define dso_local i32 @test_cmpexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind {
+; CHECK-LABEL: test_cmpexadd32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; CHECK-NEXT:    cmpnlexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xef,0x07]
+; CHECK-NEXT:    cmpgxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xef,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpzxadd32:
+; EGPR-LABEL: test_cmpexadd32:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movl %esi, %eax # encoding: [0x89,0xf0]
-; EGPR-NEXT:    cmpnlexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xef,0x07]
+; EGPR-NEXT:    cmpgxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xef,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 15)
   ret i32 %0
 }
 
-define dso_local i64 @test_cmpzxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
-; CHECK-LABEL: test_cmpzxadd64:
+define dso_local i64 @test_cmpexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind {
+; CHECK-LABEL: test_cmpexadd64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; CHECK-NEXT:    cmpnlexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xef,0x07]
+; CHECK-NEXT:    cmpgxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xef,0x07]
 ; CHECK-NEXT:    retq # encoding: [0xc3]
 ;
-; EGPR-LABEL: test_cmpzxadd64:
+; EGPR-LABEL: test_cmpexadd64:
 ; EGPR:       # %bb.0: # %entry
 ; EGPR-NEXT:    movq %rsi, %rax # encoding: [0x48,0x89,0xf0]
-; EGPR-NEXT:    cmpnlexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xef,0x07]
+; EGPR-NEXT:    cmpgxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xef,0x07]
 ; EGPR-NEXT:    retq # encoding: [0xc3]
 entry:
   %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 15)
diff --git a/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt b/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt
index 2a54bebd5212c9..7a2e09af5b3db3 100644
--- a/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt
+++ b/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt
@@ -1,20 +1,20 @@
 # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
 # RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
 
-# ATT:   cmpnbexadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpaxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpaxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b
 
-# ATT:   cmpnbexadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnbexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpaxadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpaxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b
 
-# ATT:   cmpnbexadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnbexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpaxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpaxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnbexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpaxadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpaxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00
 
 # ATT:   cmpbexadd	%ecx, %edx, 123(%rax,%rbx,4)
@@ -49,52 +49,52 @@
 # INTEL: cmpbxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b
 
-# ATT:   cmpzxadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpexadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b
 
-# ATT:   cmpzxadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpexadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnlxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpgexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpgexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b
 
-# ATT:   cmpnlxadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnlxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpgexadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpgexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b
 
-# ATT:   cmpnlxadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnlxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpgexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpgexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnlxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpgexadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpgexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnlexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpgxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpgxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b
 
-# ATT:   cmpnlexadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnlexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpgxadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpgxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b
 
-# ATT:   cmpnlexadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnlexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpgxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpgxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnlexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpgxadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpgxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00
 
 # ATT:   cmplexadd	%ecx, %edx, 123(%rax,%rbx,4)
@@ -129,20 +129,20 @@
 # INTEL: cmplxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd	%ecx, %edx, 123(%rax,%rbx,4)
-# INTEL: cmpnzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# ATT:   cmpnexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# INTEL: cmpnexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b
 
-# ATT:   cmpnzxadd	%r9, %r15, 123(%rax,%rbx,4)
-# INTEL: cmpnzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# ATT:   cmpnexadd	%r9, %r15, 123(%rax,%rbx,4)
+# INTEL: cmpnexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b
 
-# ATT:   cmpnzxadd	%r18d, %r22d, 291(%r28,%r29,4)
-# INTEL: cmpnzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# ATT:   cmpnexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# INTEL: cmpnexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd	%r19, %r23, 291(%r28,%r29,4)
-# INTEL: cmpnzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# ATT:   cmpnexadd	%r19, %r23, 291(%r28,%r29,4)
+# INTEL: cmpnexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00
 
 # ATT:   cmpnoxadd	%ecx, %edx, 123(%rax,%rbx,4)
diff --git a/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt b/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt
index 62420db37f40d7..7b1599de263263 100644
--- a/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt
+++ b/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt
@@ -193,196 +193,196 @@
 # INTEL: cmplxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnbexadd %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbexadd %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd %eax, %ecx, (%rip)
-# INTEL: cmpnbexadd dword ptr [rip], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, (%rip)
+# INTEL: cmpaxadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbexadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnbexadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpaxadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpaxadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnbexadd %eax, %ecx, 508(%rcx)
-# INTEL: cmpnbexadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, 508(%rcx)
+# INTEL: cmpaxadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd %eax, %ecx, -512(%rdx)
-# INTEL: cmpnbexadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpaxadd %eax, %ecx, -512(%rdx)
+# INTEL: cmpaxadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnbexadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpaxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbexadd %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpaxadd %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbexadd %r10, %r9, (%rip)
-# INTEL: cmpnbexadd qword ptr [rip], r9, r10
+# ATT:   cmpaxadd %r10, %r9, (%rip)
+# INTEL: cmpaxadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbexadd %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnbexadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpaxadd %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpaxadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnbexadd %r10, %r9, 1016(%rcx)
-# INTEL: cmpnbexadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpaxadd %r10, %r9, 1016(%rcx)
+# INTEL: cmpaxadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnbexadd %r10, %r9, -1024(%rdx)
-# INTEL: cmpnbexadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpaxadd %r10, %r9, -1024(%rdx)
+# INTEL: cmpaxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnbxadd %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbxadd %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbxadd %eax, %ecx, (%rip)
-# INTEL: cmpnbxadd dword ptr [rip], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, (%rip)
+# INTEL: cmpaexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbxadd %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnbxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpaexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnbxadd %eax, %ecx, 508(%rcx)
-# INTEL: cmpnbxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, 508(%rcx)
+# INTEL: cmpaexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnbxadd %eax, %ecx, -512(%rdx)
-# INTEL: cmpnbxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpaexadd %eax, %ecx, -512(%rdx)
+# INTEL: cmpaexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnbxadd %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpaexadd %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnbxadd %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpaexadd %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnbxadd %r10, %r9, (%rip)
-# INTEL: cmpnbxadd qword ptr [rip], r9, r10
+# ATT:   cmpaexadd %r10, %r9, (%rip)
+# INTEL: cmpaexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnbxadd %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnbxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpaexadd %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpaexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnbxadd %r10, %r9, 1016(%rcx)
-# INTEL: cmpnbxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpaexadd %r10, %r9, 1016(%rcx)
+# INTEL: cmpaexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnbxadd %r10, %r9, -1024(%rdx)
-# INTEL: cmpnbxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpaexadd %r10, %r9, -1024(%rdx)
+# INTEL: cmpaexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnlexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlexadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd  %eax, %ecx, (%rip)
-# INTEL: cmpnlexadd dword ptr [rip], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, (%rip)
+# INTEL: cmpgxadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlexadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnlexadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpgxadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnlexadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpnlexadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpgxadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpnlexadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpgxadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpgxadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnlexadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlexadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlexadd  %r10, %r9, (%rip)
-# INTEL: cmpnlexadd qword ptr [rip], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, (%rip)
+# INTEL: cmpgxadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlexadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnlexadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpgxadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnlexadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpnlexadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpgxadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnlexadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpnlexadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpgxadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpgxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnlxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlxadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd  %eax, %ecx, (%rip)
-# INTEL: cmpnlxadd dword ptr [rip], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, (%rip)
+# INTEL: cmpgexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlxadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnlxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpgexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnlxadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpnlxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpgexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpnlxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpgexadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpgexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnlxadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnlxadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnlxadd  %r10, %r9, (%rip)
-# INTEL: cmpnlxadd qword ptr [rip], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, (%rip)
+# INTEL: cmpgexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnlxadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnlxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpgexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnlxadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpnlxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpgexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnlxadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpnlxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpgexadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpgexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff
 
 # ATT:   cmpnoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
@@ -529,52 +529,52 @@
 # INTEL: cmpnsxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpnzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnzxadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd  %eax, %ecx, (%rip)
-# INTEL: cmpnzxadd dword ptr [rip], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, (%rip)
+# INTEL: cmpnexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnzxadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpnzxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpnexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpnzxadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpnzxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpnexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpnzxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpnexadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpnexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpnzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpnzxadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpnzxadd  %r10, %r9, (%rip)
-# INTEL: cmpnzxadd qword ptr [rip], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, (%rip)
+# INTEL: cmpnexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpnzxadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpnzxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpnexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpnzxadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpnzxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpnexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpnzxadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpnzxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpnexadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpnexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff
 
 # ATT:   cmpoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
@@ -721,52 +721,52 @@
 # INTEL: cmpsxadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff
 
-# ATT:   cmpzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
-# INTEL: cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+# INTEL: cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpzxadd  %eax, %ecx, 291(%r8,%rax,4)
-# INTEL: cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, 291(%r8,%rax,4)
+# INTEL: cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd  %eax, %ecx, (%rip)
-# INTEL: cmpzxadd dword ptr [rip], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, (%rip)
+# INTEL: cmpexadd dword ptr [rip], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpzxadd  %eax, %ecx, -128(,%rbp,2)
-# INTEL: cmpzxadd dword ptr [2*rbp - 128], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, -128(,%rbp,2)
+# INTEL: cmpexadd dword ptr [2*rbp - 128], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff
 
-# ATT:   cmpzxadd  %eax, %ecx, 508(%rcx)
-# INTEL: cmpzxadd dword ptr [rcx + 508], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, 508(%rcx)
+# INTEL: cmpexadd dword ptr [rcx + 508], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00
 
-# ATT:   cmpzxadd  %eax, %ecx, -512(%rdx)
-# INTEL: cmpzxadd dword ptr [rdx - 512], ecx, eax
+# ATT:   cmpexadd  %eax, %ecx, -512(%rdx)
+# INTEL: cmpexadd dword ptr [rdx - 512], ecx, eax
 0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff
 
-# ATT:   cmpzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
-# INTEL: cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+# ATT:   cmpexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+# INTEL: cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:   cmpzxadd  %r10, %r9, 291(%r8,%rax,4)
-# INTEL: cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+# ATT:   cmpexadd  %r10, %r9, 291(%r8,%rax,4)
+# INTEL: cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10
 0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00
 
-# ATT:   cmpzxadd  %r10, %r9, (%rip)
-# INTEL: cmpzxadd qword ptr [rip], r9, r10
+# ATT:   cmpexadd  %r10, %r9, (%rip)
+# INTEL: cmpexadd qword ptr [rip], r9, r10
 0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00
 
-# ATT:   cmpzxadd  %r10, %r9, -256(,%rbp,2)
-# INTEL: cmpzxadd qword ptr [2*rbp - 256], r9, r10
+# ATT:   cmpexadd  %r10, %r9, -256(,%rbp,2)
+# INTEL: cmpexadd qword ptr [2*rbp - 256], r9, r10
 0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff
 
-# ATT:   cmpzxadd  %r10, %r9, 1016(%rcx)
-# INTEL: cmpzxadd qword ptr [rcx + 1016], r9, r10
+# ATT:   cmpexadd  %r10, %r9, 1016(%rcx)
+# INTEL: cmpexadd qword ptr [rcx + 1016], r9, r10
 0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00
 
-# ATT:   cmpzxadd  %r10, %r9, -1024(%rdx)
-# INTEL: cmpzxadd qword ptr [rdx - 1024], r9, r10
+# ATT:   cmpexadd  %r10, %r9, -1024(%rdx)
+# INTEL: cmpexadd qword ptr [rdx - 1024], r9, r10
 0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff
 
 # ATT:   cmpbexadd  %ecx, %r8d, (%rip)
diff --git a/llvm/test/MC/X86/apx/cmpccxadd-att.s b/llvm/test/MC/X86/apx/cmpccxadd-att.s
index d6ade869ca1d26..544871274a41d1 100644
--- a/llvm/test/MC/X86/apx/cmpccxadd-att.s
+++ b/llvm/test/MC/X86/apx/cmpccxadd-att.s
@@ -3,21 +3,21 @@
 
 # ERROR-COUNT-60: error:
 # ERROR-NOT: error:
-# CHECK: {evex}	cmpnbexadd	%ecx, %edx, 123(%eax,%ebx,4)
+# CHECK: {evex}	cmpaxadd	%ecx, %edx, 123(%eax,%ebx,4)
 # CHECK: encoding: [0x67,0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b]
-         {evex}	cmpnbexadd	%ecx, %edx, 123(%eax,%ebx,4)
+         {evex}	cmpaxadd	%ecx, %edx, 123(%eax,%ebx,4)
 
-# CHECK: {evex}	cmpnbexadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpaxadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b]
-         {evex}	cmpnbexadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpaxadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnbexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpaxadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpaxadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnbexadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpaxadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpaxadd	%r19, %r23, 291(%r28,%r29,4)
 
 # CHECK: {evex}	cmpbexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe6,0x54,0x98,0x7b]
@@ -51,53 +51,53 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmpbxadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b]
-         {evex}	cmpzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpexadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpzxadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpexadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b]
-         {evex}	cmpzxadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpexadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpexadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpexadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpzxadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpexadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpexadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpnlxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b]
-         {evex}	cmpnlxadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpgexadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpnlxadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgexadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b]
-         {evex}	cmpnlxadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpgexadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnlxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpgexadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpgexadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnlxadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpgexadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpgexadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpnlexadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgxadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b]
-         {evex}	cmpnlexadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpgxadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpnlexadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpgxadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b]
-         {evex}	cmpnlexadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpgxadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnlexadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpgxadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpgxadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnlexadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpgxadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpgxadd	%r19, %r23, 291(%r28,%r29,4)
 
 # CHECK: {evex}	cmplexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xee,0x54,0x98,0x7b]
@@ -131,21 +131,21 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmplxadd	%r19, %r23, 291(%r28,%r29,4)
 
-# CHECK: {evex}	cmpnzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpnexadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b]
-         {evex}	cmpnzxadd	%ecx, %edx, 123(%rax,%rbx,4)
+         {evex}	cmpnexadd	%ecx, %edx, 123(%rax,%rbx,4)
 
-# CHECK: {evex}	cmpnzxadd	%r9, %r15, 123(%rax,%rbx,4)
+# CHECK: {evex}	cmpnexadd	%r9, %r15, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b]
-         {evex}	cmpnzxadd	%r9, %r15, 123(%rax,%rbx,4)
+         {evex}	cmpnexadd	%r9, %r15, 123(%rax,%rbx,4)
 
-# CHECK: cmpnzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+# CHECK: cmpnexadd	%r18d, %r22d, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	%r18d, %r22d, 291(%r28,%r29,4)
+         cmpnexadd	%r18d, %r22d, 291(%r28,%r29,4)
 
-# CHECK: cmpnzxadd	%r19, %r23, 291(%r28,%r29,4)
+# CHECK: cmpnexadd	%r19, %r23, 291(%r28,%r29,4)
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	%r19, %r23, 291(%r28,%r29,4)
+         cmpnexadd	%r19, %r23, 291(%r28,%r29,4)
 
 # CHECK: {evex}	cmpnoxadd	%ecx, %edx, 123(%rax,%rbx,4)
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe1,0x54,0x98,0x7b]
diff --git a/llvm/test/MC/X86/apx/cmpccxadd-intel.s b/llvm/test/MC/X86/apx/cmpccxadd-intel.s
index 4c44968fbf91ce..cace33e59d6a74 100644
--- a/llvm/test/MC/X86/apx/cmpccxadd-intel.s
+++ b/llvm/test/MC/X86/apx/cmpccxadd-intel.s
@@ -1,20 +1,20 @@
 # RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-# CHECK: {evex}	cmpnbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpaxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b]
-         {evex}	cmpnbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpaxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnbexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpaxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b]
-         {evex}	cmpnbexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpaxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnbexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpaxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpaxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnbexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpaxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnbexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpaxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
 # CHECK: {evex}	cmpbexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe6,0x54,0x98,0x7b]
@@ -48,53 +48,53 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmpbxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b]
-         {evex}	cmpzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b]
-         {evex}	cmpzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpnlxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpgexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b]
-         {evex}	cmpnlxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpgexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnlxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpgexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b]
-         {evex}	cmpnlxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpgexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnlxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpgexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpgexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnlxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpgexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpgexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpnlexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpgxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b]
-         {evex}	cmpnlexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpgxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnlexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpgxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b]
-         {evex}	cmpnlexadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpgxadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnlexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpgxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpgxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnlexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpgxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnlexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpgxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
 # CHECK: {evex}	cmplexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xee,0x54,0x98,0x7b]
@@ -128,21 +128,21 @@
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00]
          cmplxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
-# CHECK: {evex}	cmpnzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+# CHECK: {evex}	cmpnexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b]
-         {evex}	cmpnzxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
+         {evex}	cmpnexadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 
-# CHECK: {evex}	cmpnzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+# CHECK: {evex}	cmpnexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b]
-         {evex}	cmpnzxadd	qword ptr [rax + 4*rbx + 123], r15, r9
+         {evex}	cmpnexadd	qword ptr [rax + 4*rbx + 123], r15, r9
 
-# CHECK: cmpnzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+# CHECK: cmpnexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
+         cmpnexadd	dword ptr [r28 + 4*r29 + 291], r22d, r18d
 
-# CHECK: cmpnzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+# CHECK: cmpnexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00]
-         cmpnzxadd	qword ptr [r28 + 4*r29 + 291], r23, r19
+         cmpnexadd	qword ptr [r28 + 4*r29 + 291], r23, r19
 
 # CHECK: {evex}	cmpnoxadd	dword ptr [rax + 4*rbx + 123], edx, ecx
 # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe1,0x54,0x98,0x7b]
diff --git a/llvm/test/MC/X86/cmpccxadd-att-alias.s b/llvm/test/MC/X86/cmpccxadd-att-alias.s
index dcc0f105d7abc1..46c6588740b9cd 100644
--- a/llvm/test/MC/X86/cmpccxadd-att-alias.s
+++ b/llvm/test/MC/X86/cmpccxadd-att-alias.s
@@ -1,28 +1,28 @@
 // RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
 
-// CHECK: cmpnbxadd  %eax, %ecx, (%rip)
+// CHECK: cmpaexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpaexadd  %eax, %ecx, (%rip)
+          cmpnbxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpexadd  %eax, %ecx, (%rip)
+          cmpzxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpnexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnexadd  %eax, %ecx, (%rip)
+          cmpnzxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbexadd  %eax, %ecx, (%rip)
+// CHECK: cmpaxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpaxadd  %eax, %ecx, (%rip)
+          cmpnbexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlxadd  %eax, %ecx, (%rip)
+// CHECK: cmpgexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpgexadd  %eax, %ecx, (%rip)
+          cmpnlxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlexadd  %eax, %ecx, (%rip)
+// CHECK: cmpgxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpgxadd  %eax, %ecx, (%rip)
+          cmpnlexadd  %eax, %ecx, (%rip)
 
 // CHECK: cmpbxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
@@ -32,7 +32,7 @@
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
           cmpnaexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbxadd  %eax, %ecx, (%rip)
+// CHECK: cmpaexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
           cmpncxadd  %eax, %ecx, (%rip)
 
diff --git a/llvm/test/MC/X86/cmpccxadd-att.s b/llvm/test/MC/X86/cmpccxadd-att.s
index c79cc55a15b81d..a7c9df91ab0c8e 100644
--- a/llvm/test/MC/X86/cmpccxadd-att.s
+++ b/llvm/test/MC/X86/cmpccxadd-att.s
@@ -196,197 +196,197 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff]
           cmplxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnbexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpaxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpaxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbexadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpaxadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpaxadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnbexadd  %eax, %ecx, (%rip)
+// CHECK: cmpaxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd  %eax, %ecx, (%rip)
+          cmpaxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbexadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpaxadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbexadd  %eax, %ecx, -128(,%rbp,2)
+          cmpaxadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnbexadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpaxadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbexadd  %eax, %ecx, 508(%rcx)
+          cmpaxadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnbexadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpaxadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbexadd  %eax, %ecx, -512(%rdx)
+          cmpaxadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnbexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpaxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpaxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbexadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpaxadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpaxadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnbexadd  %r10, %r9, (%rip)
+// CHECK: cmpaxadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd  %r10, %r9, (%rip)
+          cmpaxadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnbexadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpaxadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbexadd  %r10, %r9, -256(,%rbp,2)
+          cmpaxadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnbexadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpaxadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbexadd  %r10, %r9, 1016(%rcx)
+          cmpaxadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnbexadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpaxadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbexadd  %r10, %r9, -1024(%rdx)
+          cmpaxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnbxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpaexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpaexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpaexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpaexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnbxadd  %eax, %ecx, (%rip)
+// CHECK: cmpaexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd  %eax, %ecx, (%rip)
+          cmpaexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnbxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpaexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpaexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnbxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpaexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbxadd  %eax, %ecx, 508(%rcx)
+          cmpaexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnbxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpaexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbxadd  %eax, %ecx, -512(%rdx)
+          cmpaexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnbxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpaexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpaexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnbxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpaexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpaexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnbxadd  %r10, %r9, (%rip)
+// CHECK: cmpaexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd  %r10, %r9, (%rip)
+          cmpaexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnbxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpaexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbxadd  %r10, %r9, -256(,%rbp,2)
+          cmpaexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnbxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpaexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbxadd  %r10, %r9, 1016(%rcx)
+          cmpaexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnbxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpaexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbxadd  %r10, %r9, -1024(%rdx)
+          cmpaexadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnlexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpgxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpgxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlexadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpgxadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpgxadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnlexadd  %eax, %ecx, (%rip)
+// CHECK: cmpgxadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd  %eax, %ecx, (%rip)
+          cmpgxadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlexadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpgxadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlexadd  %eax, %ecx, -128(,%rbp,2)
+          cmpgxadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnlexadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpgxadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlexadd  %eax, %ecx, 508(%rcx)
+          cmpgxadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnlexadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpgxadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlexadd  %eax, %ecx, -512(%rdx)
+          cmpgxadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnlexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpgxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpgxadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlexadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpgxadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpgxadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnlexadd  %r10, %r9, (%rip)
+// CHECK: cmpgxadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd  %r10, %r9, (%rip)
+          cmpgxadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnlexadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpgxadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlexadd  %r10, %r9, -256(,%rbp,2)
+          cmpgxadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnlexadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpgxadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlexadd  %r10, %r9, 1016(%rcx)
+          cmpgxadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnlexadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpgxadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlexadd  %r10, %r9, -1024(%rdx)
+          cmpgxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnlxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpgexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpgexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpgexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpgexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnlxadd  %eax, %ecx, (%rip)
+// CHECK: cmpgexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd  %eax, %ecx, (%rip)
+          cmpgexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnlxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpgexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpgexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnlxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpgexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlxadd  %eax, %ecx, 508(%rcx)
+          cmpgexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnlxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpgexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlxadd  %eax, %ecx, -512(%rdx)
+          cmpgexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnlxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpgexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpgexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnlxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpgexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpgexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnlxadd  %r10, %r9, (%rip)
+// CHECK: cmpgexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd  %r10, %r9, (%rip)
+          cmpgexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnlxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpgexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlxadd  %r10, %r9, -256(,%rbp,2)
+          cmpgexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnlxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpgexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlxadd  %r10, %r9, 1016(%rcx)
+          cmpgexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnlxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpgexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlxadd  %r10, %r9, -1024(%rdx)
+          cmpgexadd  %r10, %r9, -1024(%rdx)
 
 // CHECK: cmpnoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe1,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -532,53 +532,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff]
           cmpnsxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpnzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpnexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpnexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnzxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpnexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpnexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpnzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpnexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd  %eax, %ecx, (%rip)
+          cmpnexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpnzxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpnexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnzxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpnexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpnzxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpnexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00]
-          cmpnzxadd  %eax, %ecx, 508(%rcx)
+          cmpnexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpnzxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpnexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnzxadd  %eax, %ecx, -512(%rdx)
+          cmpnexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpnzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpnexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpnexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpnzxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpnexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpnexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpnzxadd  %r10, %r9, (%rip)
+// CHECK: cmpnexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd  %r10, %r9, (%rip)
+          cmpnexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpnzxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpnexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnzxadd  %r10, %r9, -256(,%rbp,2)
+          cmpnexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpnzxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpnexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00]
-          cmpnzxadd  %r10, %r9, 1016(%rcx)
+          cmpnexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpnzxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpnexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnzxadd  %r10, %r9, -1024(%rdx)
+          cmpnexadd  %r10, %r9, -1024(%rdx)
 
 // CHECK: cmpoxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe0,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -724,53 +724,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff]
           cmpsxadd  %r10, %r9, -1024(%rdx)
 
-// CHECK: cmpzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+// CHECK: cmpexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd  %eax, %ecx, 268435456(%rbp,%r14,8)
+          cmpexadd  %eax, %ecx, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpzxadd  %eax, %ecx, 291(%r8,%rax,4)
+// CHECK: cmpexadd  %eax, %ecx, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd  %eax, %ecx, 291(%r8,%rax,4)
+          cmpexadd  %eax, %ecx, 291(%r8,%rax,4)
 
-// CHECK: cmpzxadd  %eax, %ecx, (%rip)
+// CHECK: cmpexadd  %eax, %ecx, (%rip)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd  %eax, %ecx, (%rip)
+          cmpexadd  %eax, %ecx, (%rip)
 
-// CHECK: cmpzxadd  %eax, %ecx, -128(,%rbp,2)
+// CHECK: cmpexadd  %eax, %ecx, -128(,%rbp,2)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpzxadd  %eax, %ecx, -128(,%rbp,2)
+          cmpexadd  %eax, %ecx, -128(,%rbp,2)
 
-// CHECK: cmpzxadd  %eax, %ecx, 508(%rcx)
+// CHECK: cmpexadd  %eax, %ecx, 508(%rcx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00]
-          cmpzxadd  %eax, %ecx, 508(%rcx)
+          cmpexadd  %eax, %ecx, 508(%rcx)
 
-// CHECK: cmpzxadd  %eax, %ecx, -512(%rdx)
+// CHECK: cmpexadd  %eax, %ecx, -512(%rdx)
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff]
-          cmpzxadd  %eax, %ecx, -512(%rdx)
+          cmpexadd  %eax, %ecx, -512(%rdx)
 
-// CHECK: cmpzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+// CHECK: cmpexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd  %r10, %r9, 268435456(%rbp,%r14,8)
+          cmpexadd  %r10, %r9, 268435456(%rbp,%r14,8)
 
-// CHECK: cmpzxadd  %r10, %r9, 291(%r8,%rax,4)
+// CHECK: cmpexadd  %r10, %r9, 291(%r8,%rax,4)
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd  %r10, %r9, 291(%r8,%rax,4)
+          cmpexadd  %r10, %r9, 291(%r8,%rax,4)
 
-// CHECK: cmpzxadd  %r10, %r9, (%rip)
+// CHECK: cmpexadd  %r10, %r9, (%rip)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd  %r10, %r9, (%rip)
+          cmpexadd  %r10, %r9, (%rip)
 
-// CHECK: cmpzxadd  %r10, %r9, -256(,%rbp,2)
+// CHECK: cmpexadd  %r10, %r9, -256(,%rbp,2)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpzxadd  %r10, %r9, -256(,%rbp,2)
+          cmpexadd  %r10, %r9, -256(,%rbp,2)
 
-// CHECK: cmpzxadd  %r10, %r9, 1016(%rcx)
+// CHECK: cmpexadd  %r10, %r9, 1016(%rcx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00]
-          cmpzxadd  %r10, %r9, 1016(%rcx)
+          cmpexadd  %r10, %r9, 1016(%rcx)
 
-// CHECK: cmpzxadd  %r10, %r9, -1024(%rdx)
+// CHECK: cmpexadd  %r10, %r9, -1024(%rdx)
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff]
-          cmpzxadd  %r10, %r9, -1024(%rdx)
+          cmpexadd  %r10, %r9, -1024(%rdx)
 
 // CHECK: cmpbexadd  %ecx, %r8d, (%rip)
 // CHECK: encoding: [0xc4,0x62,0x71,0xe6,0x05,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/MC/X86/cmpccxadd-intel-alias.s b/llvm/test/MC/X86/cmpccxadd-intel-alias.s
index f5c7a6b6a2e0a5..6228d7fc67231d 100644
--- a/llvm/test/MC/X86/cmpccxadd-intel-alias.s
+++ b/llvm/test/MC/X86/cmpccxadd-intel-alias.s
@@ -1,28 +1,28 @@
 // RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK: cmpnbxadd dword ptr [rip], ecx, eax
+// CHECK: cmpaexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpaexadd dword ptr [rip], ecx, eax
+          cmpnbxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpexadd dword ptr [rip], ecx, eax
+          cmpzxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpnexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnexadd dword ptr [rip], ecx, eax
+          cmpnzxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rip], ecx, eax
+// CHECK: cmpaxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpaxadd dword ptr [rip], ecx, eax
+          cmpnbexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rip], ecx, eax
+// CHECK: cmpgexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpgexadd dword ptr [rip], ecx, eax
+          cmpnlxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rip], ecx, eax
+// CHECK: cmpgxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpgxadd dword ptr [rip], ecx, eax
+          cmpnlexadd dword ptr [rip], ecx, eax
 
 // CHECK: cmpbxadd  dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
@@ -32,7 +32,7 @@
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00]
           cmpnaexadd  dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbxadd  dword ptr [rip], ecx, eax
+// CHECK: cmpaexadd  dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
           cmpncxadd  dword ptr [rip], ecx, eax
 
diff --git a/llvm/test/MC/X86/cmpccxadd-intel.s b/llvm/test/MC/X86/cmpccxadd-intel.s
index c03873e34decea..af7c6c3b61c949 100644
--- a/llvm/test/MC/X86/cmpccxadd-intel.s
+++ b/llvm/test/MC/X86/cmpccxadd-intel.s
@@ -192,197 +192,197 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff]
           cmplxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rip], ecx, eax
+// CHECK: cmpaxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd dword ptr [rip], ecx, eax
+          cmpaxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpaxadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbexadd dword ptr [2*rbp - 128], ecx, eax
+          cmpaxadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpaxadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbexadd dword ptr [rcx + 508], ecx, eax
+          cmpaxadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnbexadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpaxadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbexadd dword ptr [rdx - 512], ecx, eax
+          cmpaxadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [rip], r9, r10
+// CHECK: cmpaxadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbexadd qword ptr [rip], r9, r10
+          cmpaxadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpaxadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbexadd qword ptr [2*rbp - 256], r9, r10
+          cmpaxadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpaxadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbexadd qword ptr [rcx + 1016], r9, r10
+          cmpaxadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnbexadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpaxadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbexadd qword ptr [rdx - 1024], r9, r10
+          cmpaxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [rip], ecx, eax
+// CHECK: cmpaexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd dword ptr [rip], ecx, eax
+          cmpaexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpaexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnbxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpaexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpaexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00]
-          cmpnbxadd dword ptr [rcx + 508], ecx, eax
+          cmpaexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnbxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpaexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnbxadd dword ptr [rdx - 512], ecx, eax
+          cmpaexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [rip], r9, r10
+// CHECK: cmpaexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00]
-          cmpnbxadd qword ptr [rip], r9, r10
+          cmpaexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpaexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnbxadd qword ptr [2*rbp - 256], r9, r10
+          cmpaexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpaexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00]
-          cmpnbxadd qword ptr [rcx + 1016], r9, r10
+          cmpaexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnbxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpaexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnbxadd qword ptr [rdx - 1024], r9, r10
+          cmpaexadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rip], ecx, eax
+// CHECK: cmpgxadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd dword ptr [rip], ecx, eax
+          cmpgxadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpgxadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlexadd dword ptr [2*rbp - 128], ecx, eax
+          cmpgxadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpgxadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlexadd dword ptr [rcx + 508], ecx, eax
+          cmpgxadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnlexadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpgxadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlexadd dword ptr [rdx - 512], ecx, eax
+          cmpgxadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [rip], r9, r10
+// CHECK: cmpgxadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlexadd qword ptr [rip], r9, r10
+          cmpgxadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpgxadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlexadd qword ptr [2*rbp - 256], r9, r10
+          cmpgxadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpgxadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlexadd qword ptr [rcx + 1016], r9, r10
+          cmpgxadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnlexadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpgxadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlexadd qword ptr [rdx - 1024], r9, r10
+          cmpgxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rip], ecx, eax
+// CHECK: cmpgexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd dword ptr [rip], ecx, eax
+          cmpgexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpgexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnlxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpgexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpgexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00]
-          cmpnlxadd dword ptr [rcx + 508], ecx, eax
+          cmpgexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnlxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpgexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnlxadd dword ptr [rdx - 512], ecx, eax
+          cmpgexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [rip], r9, r10
+// CHECK: cmpgexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00]
-          cmpnlxadd qword ptr [rip], r9, r10
+          cmpgexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpgexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnlxadd qword ptr [2*rbp - 256], r9, r10
+          cmpgexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpgexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00]
-          cmpnlxadd qword ptr [rcx + 1016], r9, r10
+          cmpgexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnlxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpgexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnlxadd qword ptr [rdx - 1024], r9, r10
+          cmpgexadd qword ptr [rdx - 1024], r9, r10
 
 // CHECK: cmpnoxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe1,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -528,53 +528,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff]
           cmpnsxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpnexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd dword ptr [rip], ecx, eax
+          cmpnexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpnexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpnzxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpnexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpnexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00]
-          cmpnzxadd dword ptr [rcx + 508], ecx, eax
+          cmpnexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpnzxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpnexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff]
-          cmpnzxadd dword ptr [rdx - 512], ecx, eax
+          cmpnexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [rip], r9, r10
+// CHECK: cmpnexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00]
-          cmpnzxadd qword ptr [rip], r9, r10
+          cmpnexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpnexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpnzxadd qword ptr [2*rbp - 256], r9, r10
+          cmpnexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpnexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00]
-          cmpnzxadd qword ptr [rcx + 1016], r9, r10
+          cmpnexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpnzxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpnexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff]
-          cmpnzxadd qword ptr [rdx - 1024], r9, r10
+          cmpnexadd qword ptr [rdx - 1024], r9, r10
 
 // CHECK: cmpoxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe0,0x8c,0xf5,0x00,0x00,0x00,0x10]
@@ -720,53 +720,53 @@
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff]
           cmpsxadd qword ptr [rdx - 1024], r9, r10
 
-// CHECK: cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+// CHECK: cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 // CHECK: encoding: [0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
+          cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+// CHECK: cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 // CHECK: encoding: [0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax
+          cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rip], ecx, eax
+// CHECK: cmpexadd dword ptr [rip], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd dword ptr [rip], ecx, eax
+          cmpexadd dword ptr [rip], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [2*rbp - 128], ecx, eax
+// CHECK: cmpexadd dword ptr [2*rbp - 128], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff]
-          cmpzxadd dword ptr [2*rbp - 128], ecx, eax
+          cmpexadd dword ptr [2*rbp - 128], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rcx + 508], ecx, eax
+// CHECK: cmpexadd dword ptr [rcx + 508], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00]
-          cmpzxadd dword ptr [rcx + 508], ecx, eax
+          cmpexadd dword ptr [rcx + 508], ecx, eax
 
-// CHECK: cmpzxadd dword ptr [rdx - 512], ecx, eax
+// CHECK: cmpexadd dword ptr [rdx - 512], ecx, eax
 // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff]
-          cmpzxadd dword ptr [rdx - 512], ecx, eax
+          cmpexadd dword ptr [rdx - 512], ecx, eax
 
-// CHECK: cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+// CHECK: cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 // CHECK: encoding: [0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10]
-          cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
+          cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10
 
-// CHECK: cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+// CHECK: cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10
 // CHECK: encoding: [0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00]
-          cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10
+          cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10
 
-// CHECK: cmpzxadd qword ptr [rip], r9, r10
+// CHECK: cmpexadd qword ptr [rip], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00]
-          cmpzxadd qword ptr [rip], r9, r10
+          cmpexadd qword ptr [rip], r9, r10
 
-// CHECK: cmpzxadd qword ptr [2*rbp - 256], r9, r10
+// CHECK: cmpexadd qword ptr [2*rbp - 256], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff]
-          cmpzxadd qword ptr [2*rbp - 256], r9, r10
+          cmpexadd qword ptr [2*rbp - 256], r9, r10
 
-// CHECK: cmpzxadd qword ptr [rcx + 1016], r9, r10
+// CHECK: cmpexadd qword ptr [rcx + 1016], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00]
-          cmpzxadd qword ptr [rcx + 1016], r9, r10
+          cmpexadd qword ptr [rcx + 1016], r9, r10
 
-// CHECK: cmpzxadd qword ptr [rdx - 1024], r9, r10
+// CHECK: cmpexadd qword ptr [rdx - 1024], r9, r10
 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff]
-          cmpzxadd qword ptr [rdx - 1024], r9, r10
+          cmpexadd qword ptr [rdx - 1024], r9, r10
 
 // CHECK: cmpbexadd dword ptr [rip], r8d, ecx
 // CHECK: encoding: [0xc4,0x62,0x71,0xe6,0x05,0x00,0x00,0x00,0x00]

From 1e34706232e5f2865ff918ba8e9f840f38cdef07 Mon Sep 17 00:00:00 2001
From: Longsheng Mou <moulongsheng@huawei.com>
Date: Thu, 15 Aug 2024 14:30:00 +0800
Subject: [PATCH 30/47] [mlir][tosa] Add verifier for `tosa.table` (#103708)

This patch adds a verifier to `tosa.table` which fixes a crash. Fix
#103086.
---
 mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td |  2 ++
 mlir/lib/Dialect/Tosa/IR/TosaOps.cpp         | 23 +++++++++++++++++
 mlir/test/Dialect/Tosa/invalid.mlir          | 27 ++++++++++++++++++++
 3 files changed, 52 insertions(+)

diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
index 7871b46724a03d..0be0f8ef2d7a0c 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -897,6 +897,8 @@ def Tosa_TableOp : Tosa_InferShapedTypeOp<"table"> {
   let assemblyFormat = [{
     $input `,` $table attr-dict `:` `(` type($input) `,` type($table) `)` `->` type($output)
   }];
+
+  let hasVerifier = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index 39ea7a5b61f5ec..d4e49b6e3c044c 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -864,6 +864,29 @@ LogicalResult tosa::TableOp::inferReturnTypeComponents(
   return success();
 }
 
+LogicalResult tosa::TableOp::verify() {
+  TensorType inputType = getInput().getType();
+  TensorType outputType = getOutput().getType();
+
+  if (inputType.hasRank() && outputType.hasRank() &&
+      inputType.getRank() != outputType.getRank())
+    return emitOpError()
+           << "expected input tensor rank to equal result tensor rank";
+
+  auto inputDims = inputType.getShape();
+  auto outputDims = outputType.getShape();
+  for (auto it : llvm::enumerate(llvm::zip(inputDims, outputDims))) {
+    int64_t dim = it.index();
+    auto [inputDim, outputDim] = it.value();
+    if (!ShapedType::isDynamic(outputDim) && outputDim != inputDim) {
+      return emitOpError() << "dim(result, " << dim << ") = " << outputDim
+                           << " doesn't match dim(input, " << dim
+                           << ") = " << inputDim;
+    }
+  }
+  return success();
+}
+
 LogicalResult tosa::TileOp::inferReturnTypeComponents(
     MLIRContext *context, ::std::optional<Location> location,
     TileOp::Adaptor adaptor,
diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir
index e1fcf056480083..e723aef3815ce6 100644
--- a/mlir/test/Dialect/Tosa/invalid.mlir
+++ b/mlir/test/Dialect/Tosa/invalid.mlir
@@ -448,3 +448,30 @@ func.func @test_large_constant_permutation() {
   %3 = tosa.transpose %2, %1 : (tensor<?x27xi64>, tensor<2xi32>) -> tensor<?x27xi64>
   return
 }
+
+// -----
+
+// CHECK-LABEL: test_table_rank0_table
+func.func @test_table_rank0_table(%arg0: tensor<64xi16>, %arg1: tensor<i16>) {
+  // expected-error@+1 {{'tosa.table' op operand #1 must be 1-d tensor, but got 'tensor<i16>'}}
+  %0 = tosa.table %arg0, %arg1 : (tensor<64xi16>, tensor<i16>) -> tensor<64xi16>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: test_table_io_rank_mismatch
+func.func @test_table_io_rank_mismatch(%arg0: tensor<64xi16>, %arg1: tensor<6xi16>) {
+  // expected-error@+1 {{'tosa.table' op expected input tensor rank to equal result tensor rank}}
+  %0 = tosa.table %arg0, %arg1 : (tensor<64xi16>, tensor<6xi16>) -> tensor<64x?xi16>
+  return
+}
+
+// -----
+
+// CHECK-LABEL: test_table_io_shape_mismatch
+func.func @test_table_io_shape_mismatch(%arg0: tensor<?x16xi16>, %arg1: tensor<6xi16>) {
+  // expected-error@+1 {{'tosa.table' op dim(result, 1) = 15 doesn't match dim(input, 1) = 16}}
+  %0 = tosa.table %arg0, %arg1 : (tensor<?x16xi16>, tensor<6xi16>) -> tensor<?x15xi16>
+  return
+}

From 3eaf483c296bd95411bc855674707f289790e2a2 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Thu, 15 Aug 2024 08:43:43 +0200
Subject: [PATCH 31/47] [include-cleaner] Remove two commented-out lines of
 code.

---
 clang-tools-extra/include-cleaner/lib/WalkAST.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
index f7cc9d19123635..b15d428326ac12 100644
--- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
+++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp
@@ -271,7 +271,6 @@ class ASTWalker : public RecursiveASTVisitor<ASTWalker> {
   // specialized template. Implicit ones are filtered out by RAV.
   bool
   VisitClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *CTSD) {
-    // if (CTSD->isExplicitSpecialization())
     if (clang::isTemplateExplicitInstantiationOrSpecialization(
             CTSD->getTemplateSpecializationKind()))
       report(CTSD->getLocation(),
@@ -279,7 +278,6 @@ class ASTWalker : public RecursiveASTVisitor<ASTWalker> {
     return true;
   }
   bool VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *VTSD) {
-    // if (VTSD->isExplicitSpecialization())
     if (clang::isTemplateExplicitInstantiationOrSpecialization(
             VTSD->getTemplateSpecializationKind()))
       report(VTSD->getLocation(),

From 12763a06526f5fee46d8d11953b1188bad9e7b0e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 15 Aug 2024 08:04:22 +0100
Subject: [PATCH 32/47] [VPlan] Move VPWidenStoreRecipe::execute to
 VPlanRecipes.cpp (NFC).

Move VPWidenStoreRecipe::execute to VPlanRecipes.cpp in line with
other ::execute implementations that don't depend on anything
defined in LoopVectorization.cpp
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 40 ------------------
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 42 +++++++++++++++++++
 2 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f1bb96a38cfaa9..fdf8f7042c4fb8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9363,46 +9363,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {
   State.set(this, Res, 0);
 }
 
-void VPWidenStoreRecipe::execute(VPTransformState &State) {
-  auto *SI = cast<StoreInst>(&Ingredient);
-
-  VPValue *StoredVPValue = getStoredValue();
-  bool CreateScatter = !isConsecutive();
-  const Align Alignment = getLoadStoreAlignment(&Ingredient);
-
-  auto &Builder = State.Builder;
-  State.setDebugLocFrom(getDebugLoc());
-
-  for (unsigned Part = 0; Part < State.UF; ++Part) {
-    Instruction *NewSI = nullptr;
-    Value *Mask = nullptr;
-    if (auto *VPMask = getMask()) {
-      // Mask reversal is only needed for non-all-one (null) masks, as reverse
-      // of a null all-one mask is a null mask.
-      Mask = State.get(VPMask, Part);
-      if (isReverse())
-        Mask = Builder.CreateVectorReverse(Mask, "reverse");
-    }
-
-    Value *StoredVal = State.get(StoredVPValue, Part);
-    if (isReverse()) {
-      // If we store to reverse consecutive memory locations, then we need
-      // to reverse the order of elements in the stored value.
-      StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
-      // We don't want to update the value in the map as it might be used in
-      // another expression. So don't call resetVectorValue(StoredVal).
-    }
-    Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateScatter);
-    if (CreateScatter)
-      NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
-    else if (Mask)
-      NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
-    else
-      NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
-    State.addMetadata(NewSI, SI);
-  }
-}
-
 void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {
   assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
                           "explicit vector length.");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 911b2fe9e9a1eb..bc57ea4d52471e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2066,7 +2066,49 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
   O << " = vp.load ";
   printOperands(O, SlotTracker);
 }
+#endif
+
+void VPWidenStoreRecipe::execute(VPTransformState &State) {
+  auto *SI = cast<StoreInst>(&Ingredient);
+
+  VPValue *StoredVPValue = getStoredValue();
+  bool CreateScatter = !isConsecutive();
+  const Align Alignment = getLoadStoreAlignment(&Ingredient);
+
+  auto &Builder = State.Builder;
+  State.setDebugLocFrom(getDebugLoc());
 
+  for (unsigned Part = 0; Part < State.UF; ++Part) {
+    Instruction *NewSI = nullptr;
+    Value *Mask = nullptr;
+    if (auto *VPMask = getMask()) {
+      // Mask reversal is only needed for non-all-one (null) masks, as reverse
+      // of a null all-one mask is a null mask.
+      Mask = State.get(VPMask, Part);
+      if (isReverse())
+        Mask = Builder.CreateVectorReverse(Mask, "reverse");
+    }
+
+    Value *StoredVal = State.get(StoredVPValue, Part);
+    if (isReverse()) {
+      // If we store to reverse consecutive memory locations, then we need
+      // to reverse the order of elements in the stored value.
+      StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
+      // We don't want to update the value in the map as it might be used in
+      // another expression. So don't call resetVectorValue(StoredVal).
+    }
+    Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateScatter);
+    if (CreateScatter)
+      NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);
+    else if (Mask)
+      NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);
+    else
+      NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);
+    State.addMetadata(NewSI, SI);
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
                                VPSlotTracker &SlotTracker) const {
   O << Indent << "WIDEN store ";

From fa343be414f9364911b947f109f3df5539e23068 Mon Sep 17 00:00:00 2001
From: NAKAMURA Takumi <geek4civic@gmail.com>
Date: Thu, 15 Aug 2024 15:56:33 +0900
Subject: [PATCH 33/47] Fix warnings in #102848 [-Wunused-but-set-variable]

---
 clang/lib/AST/MicrosoftMangle.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index a113574675b4c5..db8000e25dc7cc 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -2969,6 +2969,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
       if (const auto *AT = ResultType->getContainedAutoType()) {
         assert(AT->getKeyword() == AutoTypeKeyword::Auto &&
                "should only need to mangle auto!");
+        (void)AT;
         Out << '?';
         mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
         Out << '?';
@@ -2987,7 +2988,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
       // SourceRange Range)` for details.
       auto UseClangMangling = [](QualType ResultType) {
         QualType T = ResultType;
-        while (const auto *PT = dyn_cast<PointerType>(T.getTypePtr())) {
+        while (isa<PointerType>(T.getTypePtr())) {
           T = T->getPointeeType();
           if (T.getQualifiers().hasAddressSpace())
             return true;

From 845431a54fc2befacdfea27a852f003ad61ba720 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88@gmail.com>
Date: Thu, 15 Aug 2024 10:48:27 +0300
Subject: [PATCH 34/47] [UnitTests] Convert some data layout parsing tests to
 GTest (#104346)

For now, the testcases are grouped in a single TEST. I'll sort them out
and add more testcases in follow-up commits.
---
 ...talayout-invalid-function-ptr-alignment.ll |   5 -
 .../datalayout-invalid-i8-alignment.ll        |   5 -
 ...alayout-invalid-stack-natural-alignment.ll |   5 -
 .../invalid-datalayout-alloca-addrspace.ll    |   4 -
 .../invalid-datalayout-globals-addrspace.ll   |   4 -
 .../invalid-datalayout-index-size.ll          |   3 -
 .../invalid-datalayout-program-addrspace.ll   |   4 -
 llvm/test/Assembler/invalid-datalayout1.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout10.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout11.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout12.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout13.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout14.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout15.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout16.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout17.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout18.ll   |   3 -
 llvm/test/Assembler/invalid-datalayout19.ll   |   6 -
 llvm/test/Assembler/invalid-datalayout2.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout20.ll   |   6 -
 llvm/test/Assembler/invalid-datalayout21.ll   |   6 -
 llvm/test/Assembler/invalid-datalayout22.ll   |   6 -
 llvm/test/Assembler/invalid-datalayout23.ll   |   6 -
 llvm/test/Assembler/invalid-datalayout24.ll   |   6 -
 llvm/test/Assembler/invalid-datalayout3.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout4.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout5.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout6.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout7.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout8.ll    |   3 -
 llvm/test/Assembler/invalid-datalayout9.ll    |   3 -
 llvm/unittests/IR/DataLayoutTest.cpp          | 105 ++++++++++++++++++
 32 files changed, 105 insertions(+), 120 deletions(-)
 delete mode 100644 llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll
 delete mode 100644 llvm/test/Assembler/datalayout-invalid-i8-alignment.ll
 delete mode 100644 llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout-index-size.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout-program-addrspace.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout1.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout10.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout11.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout12.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout13.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout14.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout15.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout16.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout17.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout18.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout19.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout2.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout20.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout21.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout22.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout23.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout24.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout3.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout4.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout5.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout6.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout7.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout8.ll
 delete mode 100644 llvm/test/Assembler/invalid-datalayout9.ll

diff --git a/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll b/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll
deleted file mode 100644
index 7c1e070c292d18..00000000000000
--- a/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-; CHECK: error: Alignment is neither 0 nor a power of 2
-
-target datalayout = "Fi24"
diff --git a/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll b/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll
deleted file mode 100644
index e12cfce0309746..00000000000000
--- a/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-; CHECK: error: Invalid ABI alignment, i8 must be naturally aligned
-
-target datalayout = "i8:16"
diff --git a/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll b/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll
deleted file mode 100644
index 1ccfb7832a50cc..00000000000000
--- a/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: not llvm-as %s 2>&1 | FileCheck %s
-
-; CHECK: error: Alignment is neither 0 nor a power of 2
-
-target datalayout = "S24"
diff --git a/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll
deleted file mode 100644
index f0407da73e4fc2..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "A16777216"
-; CHECK: Invalid address space, must be a 24-bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll
deleted file mode 100644
index 19bf77db329d2c..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-; CHECK: Invalid address space, must be a 24-bit integer
-target datalayout = "G16777216"
diff --git a/llvm/test/Assembler/invalid-datalayout-index-size.ll b/llvm/test/Assembler/invalid-datalayout-index-size.ll
deleted file mode 100644
index dc608cdd56a040..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-index-size.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p:64:64:64:128"
-; CHECK: Index width cannot be larger than pointer width
diff --git a/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll
deleted file mode 100644
index e636b75dee4d04..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-; CHECK: Invalid address space, must be a 24-bit integer
-target datalayout = "P16777216"
diff --git a/llvm/test/Assembler/invalid-datalayout1.ll b/llvm/test/Assembler/invalid-datalayout1.ll
deleted file mode 100644
index d1befdcdf294d5..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout1.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "^"
-; CHECK: Unknown specifier in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout10.ll b/llvm/test/Assembler/invalid-datalayout10.ll
deleted file mode 100644
index 9f19688f852b4a..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout10.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "m"
-; CHECK: Expected mangling specifier in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout11.ll b/llvm/test/Assembler/invalid-datalayout11.ll
deleted file mode 100644
index f8fed8ff9ff339..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout11.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "m."
-; CHECK: Unexpected trailing characters after mangling specifier in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout12.ll b/llvm/test/Assembler/invalid-datalayout12.ll
deleted file mode 100644
index d79c196baab16f..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout12.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "f"
-; CHECK: Missing alignment specification in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout13.ll b/llvm/test/Assembler/invalid-datalayout13.ll
deleted file mode 100644
index 5ac719dbb7a9c0..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout13.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = ":32"
-; CHECK: Expected token before separator in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout14.ll b/llvm/test/Assembler/invalid-datalayout14.ll
deleted file mode 100644
index 84634b52a146ca..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout14.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i64:64:16"
-; CHECK: Preferred alignment cannot be less than the ABI alignment
diff --git a/llvm/test/Assembler/invalid-datalayout15.ll b/llvm/test/Assembler/invalid-datalayout15.ll
deleted file mode 100644
index ea240b73fd25f2..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout15.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i64:16:16777216"
-; CHECK: Invalid preferred alignment, must be a 16bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout16.ll b/llvm/test/Assembler/invalid-datalayout16.ll
deleted file mode 100644
index 0dd1abb629b6fc..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout16.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i64:16777216:16777216"
-; CHECK: Invalid ABI alignment, must be a 16bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout17.ll b/llvm/test/Assembler/invalid-datalayout17.ll
deleted file mode 100644
index b7eab74ad2a8ca..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout17.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "i16777216:16:16"
-; CHECK: Invalid bit width, must be a 24-bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout18.ll b/llvm/test/Assembler/invalid-datalayout18.ll
deleted file mode 100644
index b9956f98c9c6dc..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout18.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p:32:32:16"
-; CHECK: Preferred alignment cannot be less than the ABI alignment
diff --git a/llvm/test/Assembler/invalid-datalayout19.ll b/llvm/test/Assembler/invalid-datalayout19.ll
deleted file mode 100644
index fc0fc468520928..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout19.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "p:0:32:32"
-
-; CHECK: Invalid pointer size of 0 bytes
-
diff --git a/llvm/test/Assembler/invalid-datalayout2.ll b/llvm/test/Assembler/invalid-datalayout2.ll
deleted file mode 100644
index a435612bf85459..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout2.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "m:v"
-; CHECK: Unknown mangling in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout20.ll b/llvm/test/Assembler/invalid-datalayout20.ll
deleted file mode 100644
index a9ac1d7fe0983a..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout20.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "p:64:24:64"
-
-; CHECK: Pointer ABI alignment must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout21.ll b/llvm/test/Assembler/invalid-datalayout21.ll
deleted file mode 100644
index a39d1d7a14a86b..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout21.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "p:64:64:24"
-
-; CHECK: Pointer preferred alignment must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout22.ll b/llvm/test/Assembler/invalid-datalayout22.ll
deleted file mode 100644
index 14e4c2822ce4b0..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout22.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "v128:0:128"
-
-; CHECK: ABI alignment specification must be >0 for non-aggregate types
-
diff --git a/llvm/test/Assembler/invalid-datalayout23.ll b/llvm/test/Assembler/invalid-datalayout23.ll
deleted file mode 100644
index 430326327bc116..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout23.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "i32:24:32"
-
-; CHECK: Invalid ABI alignment, must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout24.ll b/llvm/test/Assembler/invalid-datalayout24.ll
deleted file mode 100644
index 616ec64518a5b9..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout24.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-
-target datalayout = "i32:32:24"
-
-; CHECK: Invalid preferred alignment, must be a power of 2
-
diff --git a/llvm/test/Assembler/invalid-datalayout3.ll b/llvm/test/Assembler/invalid-datalayout3.ll
deleted file mode 100644
index 44535fd055b5ea..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout3.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "n0"
-; CHECK: Zero width native integer type in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout4.ll b/llvm/test/Assembler/invalid-datalayout4.ll
deleted file mode 100644
index 99a6a6093954e1..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout4.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p16777216:64:64:64"
-; CHECK: Invalid address space, must be a 24-bit integer
diff --git a/llvm/test/Assembler/invalid-datalayout5.ll b/llvm/test/Assembler/invalid-datalayout5.ll
deleted file mode 100644
index 3ce8791c0870b4..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout5.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "a1:64"
-; CHECK: Sized aggregate specification in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout6.ll b/llvm/test/Assembler/invalid-datalayout6.ll
deleted file mode 100644
index 425099f7cad869..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout6.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "a:"
-; CHECK: Trailing separator in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout7.ll b/llvm/test/Assembler/invalid-datalayout7.ll
deleted file mode 100644
index 5e010710889f6d..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout7.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "p:48:52"
-; CHECK: number of bits must be a byte width multiple
diff --git a/llvm/test/Assembler/invalid-datalayout8.ll b/llvm/test/Assembler/invalid-datalayout8.ll
deleted file mode 100644
index 28832ffb17dd05..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout8.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "e-p"
-; CHECK: Missing size specification for pointer in datalayout string
diff --git a/llvm/test/Assembler/invalid-datalayout9.ll b/llvm/test/Assembler/invalid-datalayout9.ll
deleted file mode 100644
index dfeac65cf604d1..00000000000000
--- a/llvm/test/Assembler/invalid-datalayout9.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: not llvm-as < %s 2>&1 | FileCheck %s
-target datalayout = "e-p:64"
-; CHECK: Missing alignment specification for pointer in datalayout string
diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp
index 113bb578f6bc3b..dcb2e614f4c40d 100644
--- a/llvm/unittests/IR/DataLayoutTest.cpp
+++ b/llvm/unittests/IR/DataLayoutTest.cpp
@@ -19,6 +19,111 @@ using namespace llvm;
 
 namespace {
 
+// TODO: Split into multiple TESTs.
+TEST(DataLayoutTest, ParseErrors) {
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("^"),
+      FailedWithMessage("Unknown specifier in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("m:v"),
+      FailedWithMessage("Unknown mangling in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("n0"),
+      FailedWithMessage("Zero width native integer type in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p16777216:64:64:64"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("a1:64"),
+      FailedWithMessage("Sized aggregate specification in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("a:"),
+      FailedWithMessage("Trailing separator in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:48:52"),
+      FailedWithMessage("number of bits must be a byte width multiple"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("e-p"),
+      FailedWithMessage(
+          "Missing size specification for pointer in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("e-p:64"),
+      FailedWithMessage(
+          "Missing alignment specification for pointer in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("m"),
+      FailedWithMessage("Expected mangling specifier in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("m."),
+      FailedWithMessage("Unexpected trailing characters after mangling "
+                        "specifier in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("f"),
+      FailedWithMessage(
+          "Missing alignment specification in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse(":32"),
+      FailedWithMessage(
+          "Expected token before separator in datalayout string"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i64:64:16"),
+      FailedWithMessage(
+          "Preferred alignment cannot be less than the ABI alignment"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i64:16:16777216"),
+      FailedWithMessage(
+          "Invalid preferred alignment, must be a 16bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i64:16777216:16777216"),
+      FailedWithMessage("Invalid ABI alignment, must be a 16bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i16777216:16:16"),
+      FailedWithMessage("Invalid bit width, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:32:32:16"),
+      FailedWithMessage(
+          "Preferred alignment cannot be less than the ABI alignment"));
+  EXPECT_THAT_EXPECTED(DataLayout::parse("p:0:32:32"),
+                       FailedWithMessage("Invalid pointer size of 0 bytes"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:64:24:64"),
+      FailedWithMessage("Pointer ABI alignment must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:64:64:24"),
+      FailedWithMessage("Pointer preferred alignment must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("p:64:64:64:128"),
+      FailedWithMessage("Index width cannot be larger than pointer width"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("v128:0:128"),
+      FailedWithMessage(
+          "ABI alignment specification must be >0 for non-aggregate types"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i32:24:32"),
+      FailedWithMessage("Invalid ABI alignment, must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i32:32:24"),
+      FailedWithMessage("Invalid preferred alignment, must be a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("A16777216"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("G16777216"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("P16777216"),
+      FailedWithMessage("Invalid address space, must be a 24-bit integer"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("Fi24"),
+      FailedWithMessage("Alignment is neither 0 nor a power of 2"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("i8:16"),
+      FailedWithMessage("Invalid ABI alignment, i8 must be naturally aligned"));
+  EXPECT_THAT_EXPECTED(
+      DataLayout::parse("S24"),
+      FailedWithMessage("Alignment is neither 0 nor a power of 2"));
+}
+
 TEST(DataLayoutTest, CopyAssignmentInvalidatesStructLayout) {
   DataLayout DL1 = cantFail(DataLayout::parse("p:32:32"));
   DataLayout DL2 = cantFail(DataLayout::parse("p:64:64"));

From 4a00f1aab25353ca51b5d8e2b081cc66305b3cd8 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Thu, 15 Aug 2024 10:02:41 +0200
Subject: [PATCH 35/47] [mlir][test] XFAIL little-endian-only tests on SPARC
 (#103726)

3 MLIR tests `FAIL` on SPARC, both Solaris/sparcv9 and Linux/sparc64:
```
  MLIR :: Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir
  MLIR :: IR/elements-attr-interface.mlir
  MLIR :: Target/LLVMIR/llvmir-le-specific.mlir
```
The issue is always the same: the tests in question are
little-endian-only currently, so this patch `XFAIL`s them on `sparc*` as
is already done for `s390x`.

Tested on `sparcv9-sun-solaris2.11`, `sparc64-unknown-linux-gnu`,
`amd64-pc-solaris2.11`, and `x86_64-pc-linux-gnu`.
---
 .../Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir   | 2 +-
 mlir/test/IR/elements-attr-interface.mlir                     | 2 +-
 mlir/test/Target/LLVMIR/llvmir-le-specific.mlir               | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir
index 7233a8bfffa9db..47be1be30577d8 100644
--- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir
+++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir
@@ -10,7 +10,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// XFAIL: target=s390x-{{.*}}
+// XFAIL: target={{(s390x|sparc.*)-.*}}
 
 module attributes {
   spirv.target_env = #spirv.target_env<
diff --git a/mlir/test/IR/elements-attr-interface.mlir b/mlir/test/IR/elements-attr-interface.mlir
index 5234c81bd841e3..79283f1aae99a8 100644
--- a/mlir/test/IR/elements-attr-interface.mlir
+++ b/mlir/test/IR/elements-attr-interface.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-opt %s -test-elements-attr-interface -verify-diagnostics
 
 // Parsing external resources does not work on big-endian platforms currently
-// XFAIL: target=s390x-{{.*}}
+// XFAIL: target={{(s390x|sparc.*)-.*}}
 
 // This test contains various `ElementsAttr` attributes, and tests the support
 // for iterating the values of these attributes using various native C++ types.
diff --git a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
index f8d082082117cb..98145bc35cba77 100644
--- a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir
@@ -1,7 +1,7 @@
 // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s
 
 // Decoding the attribute does not work on big-endian platforms currently
-// XFAIL: target=s390x-{{.*}}
+// XFAIL: target={{(s390x|sparc.*)-.*}}
 
 // CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000]
 llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource<dense_resource_test_5xf32> : tensor<5xf32>) : !llvm.array<5 x f32>
@@ -24,4 +24,4 @@ llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dens
       dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E"
     }
   }
-#-}
\ No newline at end of file
+#-}

From cf2e10150a5a83cece4fb8935202f0d67307b5c8 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Thu, 15 Aug 2024 10:04:49 +0200
Subject: [PATCH 36/47] [flang][test] Fix
 Lower/default-initialization-globals.f90 on SPARC (#103722)

`Flang :: Lower/default-initialization-globals.f90` `FAIL`s on SPARC,
both Solaris/sparcv9 and Linux/sparc64.

The failure mode is same as on AIX/PowerPC, so both targets being
big-endian, this patch treats them the same.

Tested on `sparcv9-sun-solaris2.11`, `sparc64-unknown-linux-gnu`,
`amd64-pc-solaris2.11`, and `x86_64-pc-linux-gnu`.
---
 flang/test/Lower/default-initialization-globals.f90 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/test/Lower/default-initialization-globals.f90 b/flang/test/Lower/default-initialization-globals.f90
index 384d1cb763ad67..e9611dab467cba 100644
--- a/flang/test/Lower/default-initialization-globals.f90
+++ b/flang/test/Lower/default-initialization-globals.f90
@@ -1,5 +1,5 @@
 ! Test default initialization of global variables (static init)
-! RUN: bbc -hlfir=false %s -o - | FileCheck %s --check-prefixes=%if system-aix %{"CHECK","CHECK-BE"%} \
+! RUN: bbc -hlfir=false %s -o - | FileCheck %s --check-prefixes=%if target={{.*-aix.*|sparc.*}} %{"CHECK","CHECK-BE"%} \
 ! RUN:                                         %else %{"CHECK","CHECK-LE"%}
 
 module tinit

From e1e47acafb81e583e5cf7b3b6d609f4b5726cc67 Mon Sep 17 00:00:00 2001
From: Sergei Barannikov <barannikov88@gmail.com>
Date: Thu, 15 Aug 2024 11:06:47 +0300
Subject: [PATCH 37/47] [DataLayout] Move '*AlignElem' structs and enum inside
 DataLayout (NFC) (#103723)

This makes `LayoutAlignElem` / `PointerAlignElem` and `AlignTypeEnum`
inner types of `DataLayout`. The types are also renamed to match their
meaning (LangRef refers to them as "specification" and "specifier").

Pull Request: https://github.com/llvm/llvm-project/pull/103723
---
 llvm/include/llvm/IR/DataLayout.h | 107 ++++++--------
 llvm/lib/IR/DataLayout.cpp        | 235 +++++++++++++-----------------
 2 files changed, 152 insertions(+), 190 deletions(-)

diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index 795cd05ea5b5e2..1185939cd9c75b 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -49,51 +49,11 @@ class StructLayout;
 class Triple;
 class Value;
 
-/// Enum used to categorize the alignment types stored by LayoutAlignElem
-enum AlignTypeEnum {
-  INTEGER_ALIGN = 'i',
-  VECTOR_ALIGN = 'v',
-  FLOAT_ALIGN = 'f',
-  AGGREGATE_ALIGN = 'a'
-};
-
 // FIXME: Currently the DataLayout string carries a "preferred alignment"
 // for types. As the DataLayout is module/global, this should likely be
 // sunk down to an FTTI element that is queried rather than a global
 // preference.
 
-/// Layout alignment element.
-///
-/// Stores the alignment data associated with a given type bit width.
-struct LayoutAlignElem {
-  uint32_t TypeBitWidth;
-  Align ABIAlign;
-  Align PrefAlign;
-
-  static LayoutAlignElem get(Align ABIAlign, Align PrefAlign,
-                             uint32_t BitWidth);
-
-  bool operator==(const LayoutAlignElem &rhs) const;
-};
-
-/// Layout pointer alignment element.
-///
-/// Stores the alignment data associated with a given pointer and address space.
-struct PointerAlignElem {
-  uint32_t AddressSpace;
-  uint32_t TypeBitWidth;
-  Align ABIAlign;
-  Align PrefAlign;
-  uint32_t IndexBitWidth;
-
-  /// Initializer
-  static PointerAlignElem getInBits(uint32_t AddressSpace, Align ABIAlign,
-                                    Align PrefAlign, uint32_t TypeBitWidth,
-                                    uint32_t IndexBitWidth);
-
-  bool operator==(const PointerAlignElem &rhs) const;
-};
-
 /// A parsed version of the target data layout string in and methods for
 /// querying it.
 ///
@@ -102,6 +62,26 @@ struct PointerAlignElem {
 /// target being codegen'd to.
 class DataLayout {
 public:
+  /// Primitive type specification.
+  struct PrimitiveSpec {
+    uint32_t BitWidth;
+    Align ABIAlign;
+    Align PrefAlign;
+
+    bool operator==(const PrimitiveSpec &Other) const;
+  };
+
+  /// Pointer type specification.
+  struct PointerSpec {
+    uint32_t AddrSpace;
+    uint32_t BitWidth;
+    Align ABIAlign;
+    Align PrefAlign;
+    uint32_t IndexBitWidth;
+
+    bool operator==(const PointerSpec &Other) const;
+  };
+
   enum class FunctionPtrAlignType {
     /// The function pointer alignment is independent of the function alignment.
     Independent,
@@ -135,20 +115,26 @@ class DataLayout {
   // FIXME: `unsigned char` truncates the value parsed by `parseSpecifier`.
   SmallVector<unsigned char, 8> LegalIntWidths;
 
-  // Primitive type specifications. Sorted and uniqued by type bit width.
-  SmallVector<LayoutAlignElem, 6> IntAlignments;
-  SmallVector<LayoutAlignElem, 4> FloatAlignments;
-  SmallVector<LayoutAlignElem, 10> VectorAlignments;
+  /// Type specifier used by some internal functions.
+  enum class TypeSpecifier {
+    Integer = 'i',
+    Float = 'f',
+    Vector = 'v',
+    Aggregate = 'a'
+  };
 
-  // Pointer type specifications. Sorted and uniqued by address space number.
-  SmallVector<PointerAlignElem, 8> Pointers;
+  /// Primitive type specifications. Sorted and uniqued by type bit width.
+  SmallVector<PrimitiveSpec, 6> IntSpecs;
+  SmallVector<PrimitiveSpec, 4> FloatSpecs;
+  SmallVector<PrimitiveSpec, 10> VectorSpecs;
+
+  /// Pointer type specifications. Sorted and uniqued by address space number.
+  SmallVector<PointerSpec, 8> PointerSpecs;
 
   /// The string representation used to create this DataLayout
   std::string StringRepresentation;
 
-  const PointerAlignElem &getPointerAlignElem(uint32_t AddressSpace) const;
-
-  // Struct type ABI and preferred alignments. The default spec is "a:8:64".
+  /// Struct type ABI and preferred alignments. The default spec is "a:8:64".
   Align StructABIAlignment = Align::Constant<1>();
   Align StructPrefAlignment = Align::Constant<8>();
 
@@ -159,16 +145,19 @@ class DataLayout {
   /// well-defined bitwise representation.
   SmallVector<unsigned, 8> NonIntegralAddressSpaces;
 
-  /// Attempts to set the alignment of the given type. Returns an error
-  /// description on failure.
-  Error setAlignment(AlignTypeEnum AlignType, Align ABIAlign, Align PrefAlign,
-                     uint32_t BitWidth);
+  /// Attempts to set the specification for the given type.
+  /// Returns an error description on failure.
+  Error setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth,
+                         Align ABIAlign, Align PrefAlign);
+
+  /// Searches for a pointer specification that matches the given address space.
+  /// Returns the default address space specification if not found.
+  const PointerSpec &getPointerSpec(uint32_t AddrSpace) const;
 
-  /// Attempts to set the alignment of a pointer in the given address space.
+  /// Attempts to set the specification for pointer in the given address space.
   /// Returns an error description on failure.
-  Error setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
-                                  Align PrefAlign, uint32_t TypeBitWidth,
-                                  uint32_t IndexBitWidth);
+  Error setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign,
+                       Align PrefAlign, uint32_t IndexBitWidth);
 
   /// Internal helper to get alignment for integer of given bitwidth.
   Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const;
@@ -375,7 +364,7 @@ class DataLayout {
   /// FIXME: The defaults need to be removed once all of
   /// the backends/clients are updated.
   unsigned getPointerSizeInBits(unsigned AS = 0) const {
-    return getPointerAlignElem(AS).TypeBitWidth;
+    return getPointerSpec(AS).BitWidth;
   }
 
   /// Returns the maximum index size over all address spaces.
@@ -385,7 +374,7 @@ class DataLayout {
 
   /// Size in bits of index used for address calculation in getelementptr.
   unsigned getIndexSizeInBits(unsigned AS) const {
-    return getPointerAlignElem(AS).IndexBitWidth;
+    return getPointerSpec(AS).IndexBitWidth;
   }
 
   /// Layout pointer size, in bits, based on the type.  If this function is
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 530979c75063b4..44cd1e69818953 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -139,53 +139,20 @@ class StructLayoutMap {
 } // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
-// LayoutAlignElem, LayoutAlign support
-//===----------------------------------------------------------------------===//
-
-LayoutAlignElem LayoutAlignElem::get(Align ABIAlign, Align PrefAlign,
-                                     uint32_t BitWidth) {
-  assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
-  LayoutAlignElem retval;
-  retval.ABIAlign = ABIAlign;
-  retval.PrefAlign = PrefAlign;
-  retval.TypeBitWidth = BitWidth;
-  return retval;
-}
-
-bool LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
-  return ABIAlign == rhs.ABIAlign && PrefAlign == rhs.PrefAlign &&
-         TypeBitWidth == rhs.TypeBitWidth;
-}
-
-//===----------------------------------------------------------------------===//
-// PointerAlignElem, PointerAlign support
+//                       DataLayout Class Implementation
 //===----------------------------------------------------------------------===//
 
-PointerAlignElem PointerAlignElem::getInBits(uint32_t AddressSpace,
-                                             Align ABIAlign, Align PrefAlign,
-                                             uint32_t TypeBitWidth,
-                                             uint32_t IndexBitWidth) {
-  assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
-  PointerAlignElem retval;
-  retval.AddressSpace = AddressSpace;
-  retval.ABIAlign = ABIAlign;
-  retval.PrefAlign = PrefAlign;
-  retval.TypeBitWidth = TypeBitWidth;
-  retval.IndexBitWidth = IndexBitWidth;
-  return retval;
+bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const {
+  return BitWidth == Other.BitWidth && ABIAlign == Other.ABIAlign &&
+         PrefAlign == Other.PrefAlign;
 }
 
-bool
-PointerAlignElem::operator==(const PointerAlignElem &rhs) const {
-  return (ABIAlign == rhs.ABIAlign && AddressSpace == rhs.AddressSpace &&
-          PrefAlign == rhs.PrefAlign && TypeBitWidth == rhs.TypeBitWidth &&
-          IndexBitWidth == rhs.IndexBitWidth);
+bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const {
+  return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth &&
+         ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign &&
+         IndexBitWidth == Other.IndexBitWidth;
 }
 
-//===----------------------------------------------------------------------===//
-//                       DataLayout Class Implementation
-//===----------------------------------------------------------------------===//
-
 const char *DataLayout::getManglingComponent(const Triple &T) {
   if (T.isOSBinFormatGOFF())
     return "-m:l";
@@ -200,34 +167,34 @@ const char *DataLayout::getManglingComponent(const Triple &T) {
 
 // Default primitive type specifications.
 // NOTE: These arrays must be sorted by type bit width.
-constexpr LayoutAlignElem DefaultIntSpecs[] = {
+constexpr DataLayout::PrimitiveSpec DefaultIntSpecs[] = {
     {1, Align::Constant<1>(), Align::Constant<1>()},  // i1:8:8
     {8, Align::Constant<1>(), Align::Constant<1>()},  // i8:8:8
     {16, Align::Constant<2>(), Align::Constant<2>()}, // i16:16:16
     {32, Align::Constant<4>(), Align::Constant<4>()}, // i32:32:32
     {64, Align::Constant<4>(), Align::Constant<8>()}, // i64:32:64
 };
-constexpr LayoutAlignElem DefaultFloatSpecs[] = {
+constexpr DataLayout::PrimitiveSpec DefaultFloatSpecs[] = {
     {16, Align::Constant<2>(), Align::Constant<2>()},    // f16:16:16
     {32, Align::Constant<4>(), Align::Constant<4>()},    // f32:32:32
     {64, Align::Constant<8>(), Align::Constant<8>()},    // f64:64:64
     {128, Align::Constant<16>(), Align::Constant<16>()}, // f128:128:128
 };
-constexpr LayoutAlignElem DefaultVectorSpecs[] = {
+constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = {
     {64, Align::Constant<8>(), Align::Constant<8>()},    // v64:64:64
     {128, Align::Constant<16>(), Align::Constant<16>()}, // v128:128:128
 };
 
 // Default pointer type specifications.
-constexpr PointerAlignElem DefaultPointerSpecs[] = {
+constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = {
     {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64} // p0:64:64:64:64
 };
 
 DataLayout::DataLayout()
-    : IntAlignments(ArrayRef(DefaultIntSpecs)),
-      FloatAlignments(ArrayRef(DefaultFloatSpecs)),
-      VectorAlignments(ArrayRef(DefaultVectorSpecs)),
-      Pointers(ArrayRef(DefaultPointerSpecs)) {}
+    : IntSpecs(ArrayRef(DefaultIntSpecs)),
+      FloatSpecs(ArrayRef(DefaultFloatSpecs)),
+      VectorSpecs(ArrayRef(DefaultVectorSpecs)),
+      PointerSpecs(ArrayRef(DefaultPointerSpecs)) {}
 
 DataLayout::DataLayout(StringRef LayoutString) : DataLayout() {
   if (Error Err = parseSpecifier(LayoutString))
@@ -247,10 +214,10 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) {
   TheFunctionPtrAlignType = Other.TheFunctionPtrAlignType;
   ManglingMode = Other.ManglingMode;
   LegalIntWidths = Other.LegalIntWidths;
-  IntAlignments = Other.IntAlignments;
-  FloatAlignments = Other.FloatAlignments;
-  VectorAlignments = Other.VectorAlignments;
-  Pointers = Other.Pointers;
+  IntSpecs = Other.IntSpecs;
+  FloatSpecs = Other.FloatSpecs;
+  VectorSpecs = Other.VectorSpecs;
+  PointerSpecs = Other.PointerSpecs;
   StructABIAlignment = Other.StructABIAlignment;
   StructPrefAlignment = Other.StructPrefAlignment;
   NonIntegralAddressSpaces = Other.NonIntegralAddressSpaces;
@@ -268,11 +235,9 @@ bool DataLayout::operator==(const DataLayout &Other) const {
          FunctionPtrAlign == Other.FunctionPtrAlign &&
          TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
          ManglingMode == Other.ManglingMode &&
-         LegalIntWidths == Other.LegalIntWidths &&
-         IntAlignments == Other.IntAlignments &&
-         FloatAlignments == Other.FloatAlignments &&
-         VectorAlignments == Other.VectorAlignments &&
-         Pointers == Other.Pointers &&
+         LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs &&
+         FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs &&
+         PointerSpecs == Other.PointerSpecs &&
          StructABIAlignment == Other.StructABIAlignment &&
          StructPrefAlignment == Other.StructPrefAlignment;
 }
@@ -361,10 +326,10 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
       continue;
     }
 
-    char Specifier = Tok.front();
+    char SpecifierChar = Tok.front();
     Tok = Tok.substr(1);
 
-    switch (Specifier) {
+    switch (SpecifierChar) {
     case 's':
       // Deprecated, but ignoring here to preserve loading older textual llvm
       // ASM file
@@ -433,9 +398,9 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
             return reportError("Invalid index size of 0 bytes");
         }
       }
-      if (Error Err = setPointerAlignmentInBits(
-              AddrSpace, assumeAligned(PointerABIAlign),
-              assumeAligned(PointerPrefAlign), PointerMemSize, IndexSize))
+      if (Error Err = setPointerSpec(
+              AddrSpace, PointerMemSize, assumeAligned(PointerABIAlign),
+              assumeAligned(PointerPrefAlign), IndexSize))
         return Err;
       break;
     }
@@ -443,13 +408,22 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
     case 'v':
     case 'f':
     case 'a': {
-      AlignTypeEnum AlignType;
-      switch (Specifier) {
-      default: llvm_unreachable("Unexpected specifier!");
-      case 'i': AlignType = INTEGER_ALIGN; break;
-      case 'v': AlignType = VECTOR_ALIGN; break;
-      case 'f': AlignType = FLOAT_ALIGN; break;
-      case 'a': AlignType = AGGREGATE_ALIGN; break;
+      TypeSpecifier Specifier;
+      switch (SpecifierChar) {
+      default:
+        llvm_unreachable("Unexpected specifier!");
+      case 'i':
+        Specifier = TypeSpecifier::Integer;
+        break;
+      case 'v':
+        Specifier = TypeSpecifier::Vector;
+        break;
+      case 'f':
+        Specifier = TypeSpecifier::Float;
+        break;
+      case 'a':
+        Specifier = TypeSpecifier::Aggregate;
+        break;
       }
 
       // Bit size.
@@ -458,7 +432,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
         if (Error Err = getInt(Tok, Size))
           return Err;
 
-      if (AlignType == AGGREGATE_ALIGN && Size != 0)
+      if (Specifier == TypeSpecifier::Aggregate && Size != 0)
         return reportError(
             "Sized aggregate specification in datalayout string");
 
@@ -471,7 +445,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
       unsigned ABIAlign;
       if (Error Err = getIntInBytes(Tok, ABIAlign))
         return Err;
-      if (AlignType != AGGREGATE_ALIGN && !ABIAlign)
+      if (Specifier != TypeSpecifier::Aggregate && !ABIAlign)
         return reportError(
             "ABI alignment specification must be >0 for non-aggregate types");
 
@@ -479,7 +453,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
         return reportError("Invalid ABI alignment, must be a 16bit integer");
       if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign))
         return reportError("Invalid ABI alignment, must be a power of 2");
-      if (AlignType == INTEGER_ALIGN && Size == 8 && ABIAlign != 1)
+      if (Specifier == TypeSpecifier::Integer && Size == 8 && ABIAlign != 1)
         return reportError(
             "Invalid ABI alignment, i8 must be naturally aligned");
 
@@ -498,8 +472,8 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
       if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign))
         return reportError("Invalid preferred alignment, must be a power of 2");
 
-      if (Error Err = setAlignment(AlignType, assumeAligned(ABIAlign),
-                                   assumeAligned(PrefAlign), Size))
+      if (Error Err = setPrimitiveSpec(Specifier, Size, assumeAligned(ABIAlign),
+                                       assumeAligned(PrefAlign)))
         return Err;
 
       break;
@@ -607,16 +581,17 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
   return Error::success();
 }
 
-static SmallVectorImpl<LayoutAlignElem>::const_iterator
-findAlignmentLowerBound(const SmallVectorImpl<LayoutAlignElem> &Alignments,
-                        uint32_t BitWidth) {
-  return partition_point(Alignments, [BitWidth](const LayoutAlignElem &E) {
-    return E.TypeBitWidth < BitWidth;
+static SmallVectorImpl<DataLayout::PrimitiveSpec>::const_iterator
+findPrimitiveSpecLowerBound(
+    const SmallVectorImpl<DataLayout::PrimitiveSpec> &Specs,
+    uint32_t BitWidth) {
+  return partition_point(Specs, [BitWidth](const DataLayout::PrimitiveSpec &E) {
+    return E.BitWidth < BitWidth;
   });
 }
 
-Error DataLayout::setAlignment(AlignTypeEnum AlignType, Align ABIAlign,
-                               Align PrefAlign, uint32_t BitWidth) {
+Error DataLayout::setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth,
+                                   Align ABIAlign, Align PrefAlign) {
   // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as
   // uint16_t, it is unclear if there are requirements for alignment to be less
   // than 2^16 other than storage. In the meantime we leave the restriction as
@@ -628,74 +603,72 @@ Error DataLayout::setAlignment(AlignTypeEnum AlignType, Align ABIAlign,
     return reportError(
         "Preferred alignment cannot be less than the ABI alignment");
 
-  SmallVectorImpl<LayoutAlignElem> *Alignments;
-  switch (AlignType) {
-  case AGGREGATE_ALIGN:
+  SmallVectorImpl<PrimitiveSpec> *Specs;
+  switch (Specifier) {
+  case TypeSpecifier::Aggregate:
     StructABIAlignment = ABIAlign;
     StructPrefAlignment = PrefAlign;
     return Error::success();
-  case INTEGER_ALIGN:
-    Alignments = &IntAlignments;
+  case TypeSpecifier::Integer:
+    Specs = &IntSpecs;
     break;
-  case FLOAT_ALIGN:
-    Alignments = &FloatAlignments;
+  case TypeSpecifier::Float:
+    Specs = &FloatSpecs;
     break;
-  case VECTOR_ALIGN:
-    Alignments = &VectorAlignments;
+  case TypeSpecifier::Vector:
+    Specs = &VectorSpecs;
     break;
   }
 
-  auto I = partition_point(*Alignments, [BitWidth](const LayoutAlignElem &E) {
-    return E.TypeBitWidth < BitWidth;
+  auto I = partition_point(*Specs, [BitWidth](const PrimitiveSpec &E) {
+    return E.BitWidth < BitWidth;
   });
-  if (I != Alignments->end() && I->TypeBitWidth == BitWidth) {
+  if (I != Specs->end() && I->BitWidth == BitWidth) {
     // Update the abi, preferred alignments.
     I->ABIAlign = ABIAlign;
     I->PrefAlign = PrefAlign;
   } else {
     // Insert before I to keep the vector sorted.
-    Alignments->insert(I, LayoutAlignElem::get(ABIAlign, PrefAlign, BitWidth));
+    Specs->insert(I, PrimitiveSpec{BitWidth, ABIAlign, PrefAlign});
   }
   return Error::success();
 }
 
-const PointerAlignElem &
-DataLayout::getPointerAlignElem(uint32_t AddressSpace) const {
-  if (AddressSpace != 0) {
-    auto I = lower_bound(Pointers, AddressSpace,
-                         [](const PointerAlignElem &A, uint32_t AddressSpace) {
-      return A.AddressSpace < AddressSpace;
-    });
-    if (I != Pointers.end() && I->AddressSpace == AddressSpace)
+const DataLayout::PointerSpec &
+DataLayout::getPointerSpec(uint32_t AddrSpace) const {
+  if (AddrSpace != 0) {
+    auto I = lower_bound(PointerSpecs, AddrSpace,
+                         [](const PointerSpec &Spec, uint32_t AddrSpace) {
+                           return Spec.AddrSpace < AddrSpace;
+                         });
+    if (I != PointerSpecs.end() && I->AddrSpace == AddrSpace)
       return *I;
   }
 
-  assert(Pointers[0].AddressSpace == 0);
-  return Pointers[0];
+  assert(PointerSpecs[0].AddrSpace == 0);
+  return PointerSpecs[0];
 }
 
-Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
-                                            Align PrefAlign,
-                                            uint32_t TypeBitWidth,
-                                            uint32_t IndexBitWidth) {
+Error DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth,
+                                 Align ABIAlign, Align PrefAlign,
+                                 uint32_t IndexBitWidth) {
   if (PrefAlign < ABIAlign)
     return reportError(
         "Preferred alignment cannot be less than the ABI alignment");
-  if (IndexBitWidth > TypeBitWidth)
+  if (IndexBitWidth > BitWidth)
     return reportError("Index width cannot be larger than pointer width");
 
-  auto I = lower_bound(Pointers, AddrSpace,
-                       [](const PointerAlignElem &A, uint32_t AddressSpace) {
-    return A.AddressSpace < AddressSpace;
-  });
-  if (I == Pointers.end() || I->AddressSpace != AddrSpace) {
-    Pointers.insert(I,
-                    PointerAlignElem::getInBits(AddrSpace, ABIAlign, PrefAlign,
-                                                TypeBitWidth, IndexBitWidth));
+  auto I = lower_bound(PointerSpecs, AddrSpace,
+                       [](const PointerSpec &A, uint32_t AddrSpace) {
+                         return A.AddrSpace < AddrSpace;
+                       });
+  if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) {
+    PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign,
+                                       IndexBitWidth});
   } else {
+    I->BitWidth = BitWidth;
     I->ABIAlign = ABIAlign;
     I->PrefAlign = PrefAlign;
-    I->TypeBitWidth = TypeBitWidth;
     I->IndexBitWidth = IndexBitWidth;
   }
   return Error::success();
@@ -703,11 +676,11 @@ Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
 
 Align DataLayout::getIntegerAlignment(uint32_t BitWidth,
                                       bool abi_or_pref) const {
-  auto I = findAlignmentLowerBound(IntAlignments, BitWidth);
+  auto I = findPrimitiveSpecLowerBound(IntSpecs, BitWidth);
   // If we don't have an exact match, use alignment of next larger integer
   // type. If there is none, use alignment of largest integer type by going
   // back one element.
-  if (I == IntAlignments.end())
+  if (I == IntSpecs.end())
     --I;
   return abi_or_pref ? I->ABIAlign : I->PrefAlign;
 }
@@ -737,22 +710,22 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
 }
 
 Align DataLayout::getPointerABIAlignment(unsigned AS) const {
-  return getPointerAlignElem(AS).ABIAlign;
+  return getPointerSpec(AS).ABIAlign;
 }
 
 Align DataLayout::getPointerPrefAlignment(unsigned AS) const {
-  return getPointerAlignElem(AS).PrefAlign;
+  return getPointerSpec(AS).PrefAlign;
 }
 
 unsigned DataLayout::getPointerSize(unsigned AS) const {
-  return divideCeil(getPointerAlignElem(AS).TypeBitWidth, 8);
+  return divideCeil(getPointerSpec(AS).BitWidth, 8);
 }
 
 unsigned DataLayout::getMaxIndexSize() const {
   unsigned MaxIndexSize = 0;
-  for (auto &P : Pointers)
+  for (const PointerSpec &Spec : PointerSpecs)
     MaxIndexSize =
-        std::max(MaxIndexSize, (unsigned)divideCeil(P.TypeBitWidth, 8));
+        std::max(MaxIndexSize, (unsigned)divideCeil(Spec.BitWidth, 8));
 
   return MaxIndexSize;
 }
@@ -765,7 +738,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const {
 }
 
 unsigned DataLayout::getIndexSize(unsigned AS) const {
-  return divideCeil(getPointerAlignElem(AS).IndexBitWidth, 8);
+  return divideCeil(getPointerSpec(AS).IndexBitWidth, 8);
 }
 
 unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const {
@@ -819,8 +792,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
   case Type::FP128TyID:
   case Type::X86_FP80TyID: {
     unsigned BitWidth = getTypeSizeInBits(Ty).getFixedValue();
-    auto I = findAlignmentLowerBound(FloatAlignments, BitWidth);
-    if (I != FloatAlignments.end() && I->TypeBitWidth == BitWidth)
+    auto I = findPrimitiveSpecLowerBound(FloatSpecs, BitWidth);
+    if (I != FloatSpecs.end() && I->BitWidth == BitWidth)
       return abi_or_pref ? I->ABIAlign : I->PrefAlign;
 
     // If we still couldn't find a reasonable default alignment, fall back
@@ -834,8 +807,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
   case Type::FixedVectorTyID:
   case Type::ScalableVectorTyID: {
     unsigned BitWidth = getTypeSizeInBits(Ty).getKnownMinValue();
-    auto I = findAlignmentLowerBound(VectorAlignments, BitWidth);
-    if (I != VectorAlignments.end() && I->TypeBitWidth == BitWidth)
+    auto I = findPrimitiveSpecLowerBound(VectorSpecs, BitWidth);
+    if (I != VectorSpecs.end() && I->BitWidth == BitWidth)
       return abi_or_pref ? I->ABIAlign : I->PrefAlign;
 
     // By default, use natural alignment for vector types. This is consistent

From 3d06de544b8397a6b93a4fdb52650579237b77fa Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Thu, 15 Aug 2024 09:07:02 +0100
Subject: [PATCH 38/47] [lldb] Remove Phabricator usernames from Code Owners
 file (#102590)

Removing them simplifies the content and means we don't confuse anyone
who joined after the Phabricator shutdown.

You could use them for review archaeology but this is only a subset of
the names you'd encounter there anyway. So I don't think this is a good
reason to keep them here. With a couple of exceptions the
Phabricator/GitHub names are the same and/or related to their full name
anyway.
---
 lldb/CodeOwners.rst | 96 ++++++++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/lldb/CodeOwners.rst b/lldb/CodeOwners.rst
index 52e3e550523e5b..3c10c2a28da9e7 100644
--- a/lldb/CodeOwners.rst
+++ b/lldb/CodeOwners.rst
@@ -17,7 +17,7 @@ assistance.
 All parts of LLDB not covered by someone else
 ----------------------------------------------
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 Components
 ----------
@@ -27,100 +27,100 @@ LLDB.
 ABI
 ~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 
 Breakpoint
 ~~~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 CMake & Build System
 ~~~~~~~~~~~~~~~~~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 | Alex Langford
-| alangford\@apple.com (email), bulbazord (Phabricator), bulbazord (GitHub), bulbazord (Discourse), bulba_zord (Discord)
+| alangford\@apple.com (email), bulbazord (GitHub), bulbazord (Discourse), bulba_zord (Discord)
 
 Commands
 ~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Expression Parser
 ~~~~~~~~~~~~~~~~~
 | Michael Buch
-| michaelbuch12\@gmail.com (email), Michael137 (Phabricator), Michael137 (GitHub), Michael137 (Discourse)
+| michaelbuch12\@gmail.com (email), Michael137 (GitHub), Michael137 (Discourse)
 
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Interpreter
 ~~~~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 
 Lua
 ~~~
 | Jonas Delvieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 Python
 ~~~~~~
 | Med Ismail Bennani
-| ismail\@bennani.ma (email), mib (Phabricator), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
+| ismail\@bennani.ma (email), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
 
 Target/Process Control
 ~~~~~~~~~~~~~~~~~~~~~~
 | Med Ismail Bennani
-| ismail\@bennani.ma (email), mib (Phabricator), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
+| ismail\@bennani.ma (email), medismailben (GitHub), mib (Discourse), mib#8727 (Discord)
 
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Test Suite
 ~~~~~~~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 Trace
 ~~~~~
 | Walter Erquinigo
-| a20012251\@gmail.com (email), wallace (Phabricator), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
+| a20012251\@gmail.com (email), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
 
 Unwinding
 ~~~~~~~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 Utility
 ~~~~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 ValueObject
 ~~~~~~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 Watchpoints
 ~~~~~~~~~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 File Formats
 ------------
@@ -130,54 +130,54 @@ info formats.
 (PE)COFF
 ~~~~~~~~
 | Saleem Abdulrasool
-| compnerd\@compnerd.org (email), compnerd (Phabricator), compnerd (GitHub), compnerd (Discourse), compnerd (Discord)
+| compnerd\@compnerd.org (email), compnerd (GitHub), compnerd (Discourse), compnerd (Discord)
 
 Breakpad
 ~~~~~~~~
 | Zequan Wu
-| zequanwu\@google.com (email), zequanwu (Phabricator), ZequanWu (GitHub), ZequanWu (Discourse)
+| zequanwu\@google.com (email), ZequanWu (GitHub), ZequanWu (Discourse)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 CTF
 ~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 DWARF
 ~~~~~
 | Adrian Prantl
-| aprantl\@apple.com (email), aprantl (Phabricator), adrian-prantl (GitHub), adrian.prantl (Discourse), adrian.prantl (Discord), Adrian Prantl#4366 (Discourse)
+| aprantl\@apple.com (email), adrian-prantl (GitHub), adrian.prantl (Discourse), adrian.prantl (Discord), Adrian Prantl#4366 (Discourse)
 
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 ELF
 ~~~
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 JSON
 ~~~~
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 MachO
 ~~~~~
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 PDB
 ~~~
 | Zequan Wu
-| zequanwu\@google.com (email), zequanwu (Phabricator), ZequanWu (GitHub), ZequanWu (Discourse)
+| zequanwu\@google.com (email), ZequanWu (GitHub), ZequanWu (Discourse)
 
 Platforms
 ---------
@@ -186,36 +186,36 @@ The following people are responsible for decisions involving platforms.
 Android
 ~~~~~~~
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 Darwin
 ~~~~~~
 | Jim Ingham
-| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse)
+| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse)
 
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 | Jonas Devlieghere
-| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
+| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord)
 
 FreeBSD
 ~~~~~~~
 | Ed Maste
-| emaste\@freebsd.org (email), emaste (Phabricator), emaste (GitHub), emaste (Discourse), emaste (Discord)
+| emaste\@freebsd.org (email), emaste (GitHub), emaste (Discourse), emaste (Discord)
 
 Linux
 ~~~~~
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 Windows
 ~~~~~~~
 | Omair Javaid
-| omair.javaid\@linaro.org (email), omjavaid (Phabricator), omjavaid (GitHub), omjavaid (Discourse), omjavaid#9902 (Discord)
+| omair.javaid\@linaro.org (email), omjavaid (GitHub), omjavaid (Discourse), omjavaid#9902 (Discord)
 
 Tools
 -----
@@ -224,23 +224,23 @@ The following people are responsible for decisions involving specific tools.
 debugserver
 ~~~~~~~~~~~
 | Jason Molenda
-| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
+| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord)
 
 lldb-server
 ~~~~~~~~~~~
 | David Spickett
-| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
+| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord)
 
 | Pavel Labath
-| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse)
+| pavel\@labath.sk (email), labath (GitHub), labath (Discourse)
 
 lldb-dap
 ~~~~~~~~
 | Greg Clayton
-| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse)
+| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse)
 
 | Walter Erquinigo
-| a20012251\@gmail.com (email), wallace (Phabricator), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
+| a20012251\@gmail.com (email), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord)
 
 Former Code Owners
 ==================

From 6d9cae12bdfcaeb2be10866e20e8883242f02c6c Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Thu, 15 Aug 2024 10:07:28 +0200
Subject: [PATCH 39/47] [flang][test] Run Driver/fveclib-codegen.f90 for
 aarch64 and x86_64 (#103730)

`Flang :: Driver/fveclib-codegen.f90` currently `FAIL`s on SPARC, both
Solaris/sparcv9 and Linux/sparc64:
```
bin/flang-new -S -Ofast -fveclib=LIBMVEC -o - /vol/llvm/src/llvm-project/local/flang/test/Driver/fveclib-codegen.f90

flang/test/Driver/fveclib-codegen.f90:11:10: error: CHECK: expected string not found in input
! CHECK: _ZGVbN4vv_powf
         ^
```
The code in question only contains calls to `powf`. Given that `glibc`
only supports `libmvec` on `aarch64` and `x86_64`, this test targets
only those if possible.

Tested on `sparcv9-sun-solaris2.11`, `sparc64-unknown-linux-gnu`,
`amd64-pc-solaris2.11`, and `x86_64-pc-linux-gnu`.
---
 flang/test/Driver/fveclib-codegen.f90 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/test/Driver/fveclib-codegen.f90 b/flang/test/Driver/fveclib-codegen.f90
index 8d7d3af1e8f9ba..3a96c29ac70854 100644
--- a/flang/test/Driver/fveclib-codegen.f90
+++ b/flang/test/Driver/fveclib-codegen.f90
@@ -1,6 +1,7 @@
 ! test that -fveclib= is passed to the backend
 ! -target aarch64 so that ArmPL is available
-! RUN: %flang -S -Ofast -fveclib=LIBMVEC -o - %s | FileCheck %s
+! RUN: %if aarch64-registered-target %{ %flang -S -Ofast -target aarch64-unknown-linux-gnu -fveclib=LIBMVEC -o - %s | FileCheck %s %}
+! RUN: %if x86-registered-target %{ %flang -S -Ofast -target x86_64-unknown-linux-gnu -fveclib=LIBMVEC -o - %s | FileCheck %s %}
 ! RUN: %flang -S -Ofast -fveclib=NoLibrary -o - %s | FileCheck %s --check-prefix=NOLIB
 
 subroutine sb(a, b)

From 6f6422f4a2b8647a59936c131e50a79906d89510 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Thu, 15 Aug 2024 08:14:13 +0000
Subject: [PATCH 40/47] [lldb][test] Mark sys_info zdump test unsupported on 32
 bit Arm Linux

Until https://github.com/llvm/llvm-project/pull/103056 lands
or another more appropriate check can be found.

This test fails on Ubuntu Focal where zdump is built with 32 bit time_t
but passes on Ubuntu Jammy where zdump is built with 64 bit time_t.

Marking it unsupported means Linaro can upgrade its bots to Ubuntu
Jammy without getting an unexpected pass.
---
 .../time.zone.members/sys_info.zdump.pass.cpp                   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
index 207f8e4df45413..2b97d9a5bc745b 100644
--- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
+++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp
@@ -14,7 +14,7 @@
 // XFAIL: availability-tzdb-missing
 
 // TODO TZDB Investigate
-// XFAIL: target={{armv(7|8)l-linux-gnueabihf}}
+// UNSUPPORTED: target={{armv(7|8)l-linux-gnueabihf}}
 
 #include <chrono>
 #include <format>

From 141536544f4ec1d1bf24256157f4ff1a3bc07dae Mon Sep 17 00:00:00 2001
From: Christian Ulmann <christianulmann@gmail.com>
Date: Thu, 15 Aug 2024 10:30:44 +0200
Subject: [PATCH 41/47] [MLIR][LLVM]: Add an IR utility to perform slice
 walking (#103053)

This commit introduces a slicing utility that can be used to walk
arbitrary IR slices. It additionally ships logic to determine control
flow predecessors, which allows users to walk backward slices without
dealing with both `RegionBranchOpInterface` and `BranchOpInterface`.

This utility is used to improve the `noalias` propagation in the LLVM
dialect's inliner interface. Before this change, it broke down as soon
as pointer were passed through region control flow operations.
---
 mlir/include/mlir/Analysis/SliceWalk.h        |  98 ++++++++++++
 mlir/lib/Analysis/CMakeLists.txt              |   1 +
 mlir/lib/Analysis/SliceWalk.cpp               | 139 ++++++++++++++++++
 .../Transforms/InlinerInterfaceImpl.cpp       | 117 ++++++---------
 .../Dialect/LLVMIR/inlining-alias-scopes.mlir |  54 +++++++
 5 files changed, 333 insertions(+), 76 deletions(-)
 create mode 100644 mlir/include/mlir/Analysis/SliceWalk.h
 create mode 100644 mlir/lib/Analysis/SliceWalk.cpp

diff --git a/mlir/include/mlir/Analysis/SliceWalk.h b/mlir/include/mlir/Analysis/SliceWalk.h
new file mode 100644
index 00000000000000..481c5690c533ba
--- /dev/null
+++ b/mlir/include/mlir/Analysis/SliceWalk.h
@@ -0,0 +1,98 @@
+//===- SliceWalk.h - Helpers for performing IR slice walks ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_SLICEWALK_H
+#define MLIR_ANALYSIS_SLICEWALK_H
+
+#include "mlir/IR/ValueRange.h"
+
+namespace mlir {
+
+/// A class to signal how to proceed with the walk of the backward slice:
+/// - Interrupt: Stops the walk.
+/// - AdvanceTo: Continues the walk to user-specified values.
+/// - Skip: Continues the walk, but skips the predecessors of the current value.
+class WalkContinuation {
+public:
+  enum class WalkAction {
+    /// Stops the walk.
+    Interrupt,
+    /// Continues the walk to user-specified values.
+    AdvanceTo,
+    /// Continues the walk, but skips the predecessors of the current value.
+    Skip
+  };
+
+  WalkContinuation(WalkAction action, mlir::ValueRange nextValues)
+      : action(action), nextValues(nextValues) {}
+
+  /// Allows diagnostics to interrupt the walk.
+  explicit WalkContinuation(mlir::Diagnostic &&)
+      : action(WalkAction::Interrupt) {}
+
+  /// Allows diagnostics to interrupt the walk.
+  explicit WalkContinuation(mlir::InFlightDiagnostic &&)
+      : action(WalkAction::Interrupt) {}
+
+  /// Creates a continuation that interrupts the walk.
+  static WalkContinuation interrupt() {
+    return WalkContinuation(WalkAction::Interrupt, {});
+  }
+
+  /// Creates a continuation that adds the user-specified `nextValues` to the
+  /// work list and advances the walk.
+  static WalkContinuation advanceTo(mlir::ValueRange nextValues) {
+    return WalkContinuation(WalkAction::AdvanceTo, nextValues);
+  }
+
+  /// Creates a continuation that advances the walk without adding any
+  /// predecessor values to the work list.
+  static WalkContinuation skip() {
+    return WalkContinuation(WalkAction::Skip, {});
+  }
+
+  /// Returns true if the walk was interrupted.
+  bool wasInterrupted() const { return action == WalkAction::Interrupt; }
+
+  /// Returns true if the walk was skipped.
+  bool wasSkipped() const { return action == WalkAction::Skip; }
+
+  /// Returns true if the walk was advanced to user-specified values.
+  bool wasAdvancedTo() const { return action == WalkAction::AdvanceTo; }
+
+  /// Returns the next values to continue the walk with.
+  mlir::ArrayRef<mlir::Value> getNextValues() const { return nextValues; }
+
+private:
+  WalkAction action;
+  /// The next values to continue the walk with.
+  mlir::SmallVector<mlir::Value> nextValues;
+};
+
+/// A callback that is invoked for each value encountered during the walk of the
+/// slice. The callback takes the current value, and returns the walk
+/// continuation, which determines if the walk should proceed and if yes, with
+/// which values.
+using WalkCallback = mlir::function_ref<WalkContinuation(mlir::Value)>;
+
+/// Walks the slice starting from the `rootValues` using a depth-first
+/// traversal. The walk calls the provided `walkCallback` for each value
+/// encountered in the slice and uses the returned walk continuation to
+/// determine how to proceed.
+WalkContinuation walkSlice(mlir::ValueRange rootValues,
+                           WalkCallback walkCallback);
+
+/// Computes a vector of all control predecessors of `value`. Relies on
+/// RegionBranchOpInterface and BranchOpInterface to determine predecessors.
+/// Returns nullopt if `value` has no predecessors or when the relevant
+/// operations are missing the interface implementations.
+std::optional<SmallVector<Value>> getControlFlowPredecessors(Value value);
+
+} // namespace mlir
+
+#endif // MLIR_ANALYSIS_SLICEWALK_H
diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt
index 38d8415d81c72d..609cb34309829e 100644
--- a/mlir/lib/Analysis/CMakeLists.txt
+++ b/mlir/lib/Analysis/CMakeLists.txt
@@ -29,6 +29,7 @@ add_mlir_library(MLIRAnalysis
   Liveness.cpp
   CFGLoopInfo.cpp
   SliceAnalysis.cpp
+  SliceWalk.cpp
   TopologicalSortUtils.cpp
 
   AliasAnalysis/LocalAliasAnalysis.cpp
diff --git a/mlir/lib/Analysis/SliceWalk.cpp b/mlir/lib/Analysis/SliceWalk.cpp
new file mode 100644
index 00000000000000..9d770639dc53ca
--- /dev/null
+++ b/mlir/lib/Analysis/SliceWalk.cpp
@@ -0,0 +1,139 @@
+#include "mlir/Analysis/SliceWalk.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+
+using namespace mlir;
+
+WalkContinuation mlir::walkSlice(ValueRange rootValues,
+                                 WalkCallback walkCallback) {
+  // Search the backward slice starting from the root values.
+  SmallVector<Value> workList = rootValues;
+  llvm::SmallDenseSet<Value, 16> seenValues;
+  while (!workList.empty()) {
+    // Search the backward slice of the current value.
+    Value current = workList.pop_back_val();
+
+    // Skip the current value if it has already been seen.
+    if (!seenValues.insert(current).second)
+      continue;
+
+    // Call the walk callback with the current value.
+    WalkContinuation continuation = walkCallback(current);
+    if (continuation.wasInterrupted())
+      return continuation;
+    if (continuation.wasSkipped())
+      continue;
+
+    assert(continuation.wasAdvancedTo());
+    // Add the next values to the work list if the walk should continue.
+    workList.append(continuation.getNextValues().begin(),
+                    continuation.getNextValues().end());
+  }
+
+  return WalkContinuation::skip();
+}
+
+/// Returns the operands from all predecessor regions that match `operandNumber`
+/// for the `successor` region within `regionOp`.
+static SmallVector<Value>
+getRegionPredecessorOperands(RegionBranchOpInterface regionOp,
+                             RegionSuccessor successor,
+                             unsigned operandNumber) {
+  SmallVector<Value> predecessorOperands;
+
+  // Returns true if `successors` contains `successor`.
+  auto isContained = [](ArrayRef<RegionSuccessor> successors,
+                        RegionSuccessor successor) {
+    auto *it = llvm::find_if(successors, [&successor](RegionSuccessor curr) {
+      return curr.getSuccessor() == successor.getSuccessor();
+    });
+    return it != successors.end();
+  };
+
+  // Search the operand ranges on the region operation itself.
+  SmallVector<Attribute> operandAttributes(regionOp->getNumOperands());
+  SmallVector<RegionSuccessor> successors;
+  regionOp.getEntrySuccessorRegions(operandAttributes, successors);
+  if (isContained(successors, successor)) {
+    OperandRange operands = regionOp.getEntrySuccessorOperands(successor);
+    predecessorOperands.push_back(operands[operandNumber]);
+  }
+
+  // Search the operand ranges on region terminators.
+  for (Region &region : regionOp->getRegions()) {
+    for (Block &block : region) {
+      auto terminatorOp =
+          dyn_cast<RegionBranchTerminatorOpInterface>(block.getTerminator());
+      if (!terminatorOp)
+        continue;
+      SmallVector<Attribute> operandAttributes(terminatorOp->getNumOperands());
+      SmallVector<RegionSuccessor> successors;
+      terminatorOp.getSuccessorRegions(operandAttributes, successors);
+      if (isContained(successors, successor)) {
+        OperandRange operands = terminatorOp.getSuccessorOperands(successor);
+        predecessorOperands.push_back(operands[operandNumber]);
+      }
+    }
+  }
+
+  return predecessorOperands;
+}
+
+/// Returns the predecessor branch operands that match `blockArg`, or nullopt if
+/// some of the predecessor terminators do not implement the BranchOpInterface.
+static std::optional<SmallVector<Value>>
+getBlockPredecessorOperands(BlockArgument blockArg) {
+  Block *block = blockArg.getOwner();
+
+  // Search the predecessor operands for all predecessor terminators.
+  SmallVector<Value> predecessorOperands;
+  for (auto it = block->pred_begin(); it != block->pred_end(); ++it) {
+    Block *predecessor = *it;
+    auto branchOp = dyn_cast<BranchOpInterface>(predecessor->getTerminator());
+    if (!branchOp)
+      return std::nullopt;
+    SuccessorOperands successorOperands =
+        branchOp.getSuccessorOperands(it.getSuccessorIndex());
+    // Store the predecessor operand if the block argument matches an operand
+    // and is not produced by the terminator.
+    if (Value operand = successorOperands[blockArg.getArgNumber()])
+      predecessorOperands.push_back(operand);
+  }
+
+  return predecessorOperands;
+}
+
+std::optional<SmallVector<Value>>
+mlir::getControlFlowPredecessors(Value value) {
+  SmallVector<Value> result;
+  if (OpResult opResult = dyn_cast<OpResult>(value)) {
+    auto regionOp = dyn_cast<RegionBranchOpInterface>(opResult.getOwner());
+    // If the interface is not implemented, there are no control flow
+    // predecessors to work with.
+    if (!regionOp)
+      return std::nullopt;
+    // Add the control flow predecessor operands to the work list.
+    RegionSuccessor region(regionOp->getResults());
+    SmallVector<Value> predecessorOperands = getRegionPredecessorOperands(
+        regionOp, region, opResult.getResultNumber());
+    return predecessorOperands;
+  }
+
+  auto blockArg = cast<BlockArgument>(value);
+  Block *block = blockArg.getOwner();
+  // Search the region predecessor operands for structured control flow.
+  if (block->isEntryBlock()) {
+    if (auto regionBranchOp =
+            dyn_cast<RegionBranchOpInterface>(block->getParentOp())) {
+      RegionSuccessor region(blockArg.getParentRegion());
+      SmallVector<Value> predecessorOperands = getRegionPredecessorOperands(
+          regionBranchOp, region, blockArg.getArgNumber());
+      return predecessorOperands;
+    }
+    // If the interface is not implemented, there are no control flow
+    // predecessors to work with.
+    return std::nullopt;
+  }
+
+  // Search the block predecessor operands for unstructured control flow.
+  return getBlockPredecessorOperands(blockArg);
+}
diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
index 8eba76a9abee8d..504f63b48c9433 100644
--- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.h"
+#include "mlir/Analysis/SliceWalk.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Interfaces/DataLayoutInterfaces.h"
@@ -221,86 +222,45 @@ static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) {
   return ArrayAttr::get(lhs.getContext(), result);
 }
 
-/// Attempts to return the underlying pointer value that `pointerValue` is based
-/// on. This traverses down the chain of operations to the last operation
-/// producing the base pointer and returns it. If it encounters an operation it
-/// cannot further traverse through, returns the operation's result.
-static Value getUnderlyingObject(Value pointerValue) {
-  while (true) {
-    if (auto gepOp = pointerValue.getDefiningOp<LLVM::GEPOp>()) {
-      pointerValue = gepOp.getBase();
-      continue;
-    }
-
-    if (auto addrCast = pointerValue.getDefiningOp<LLVM::AddrSpaceCastOp>()) {
-      pointerValue = addrCast.getOperand();
-      continue;
-    }
-
-    break;
-  }
-
-  return pointerValue;
-}
-
 /// Attempts to return the set of all underlying pointer values that
 /// `pointerValue` is based on. This function traverses through select
-/// operations and block arguments unlike getUnderlyingObject.
-static SmallVector<Value> getUnderlyingObjectSet(Value pointerValue) {
+/// operations and block arguments.
+static FailureOr<SmallVector<Value>>
+getUnderlyingObjectSet(Value pointerValue) {
   SmallVector<Value> result;
-
-  SmallVector<Value> workList{pointerValue};
-  // Avoid dataflow loops.
-  SmallPtrSet<Value, 4> seen;
-  do {
-    Value current = workList.pop_back_val();
-    current = getUnderlyingObject(current);
-
-    if (!seen.insert(current).second)
-      continue;
-
-    if (auto selectOp = current.getDefiningOp<LLVM::SelectOp>()) {
-      workList.push_back(selectOp.getTrueValue());
-      workList.push_back(selectOp.getFalseValue());
-      continue;
+  WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) {
+    if (auto gepOp = val.getDefiningOp<LLVM::GEPOp>())
+      return WalkContinuation::advanceTo(gepOp.getBase());
+
+    if (auto addrCast = val.getDefiningOp<LLVM::AddrSpaceCastOp>())
+      return WalkContinuation::advanceTo(addrCast.getOperand());
+
+    // TODO: Add a SelectLikeOpInterface and use it in the slicing utility.
+    if (auto selectOp = val.getDefiningOp<LLVM::SelectOp>())
+      return WalkContinuation::advanceTo(
+          {selectOp.getTrueValue(), selectOp.getFalseValue()});
+
+    // Attempt to advance to control flow predecessors.
+    std::optional<SmallVector<Value>> controlFlowPredecessors =
+        getControlFlowPredecessors(val);
+    if (controlFlowPredecessors)
+      return WalkContinuation::advanceTo(*controlFlowPredecessors);
+
+    // For all non-control flow results, consider `val` an underlying object.
+    if (isa<OpResult>(val)) {
+      result.push_back(val);
+      return WalkContinuation::skip();
     }
 
-    if (auto blockArg = dyn_cast<BlockArgument>(current)) {
-      Block *parentBlock = blockArg.getParentBlock();
-
-      // Attempt to find all block argument operands for every predecessor.
-      // If any operand to the block argument wasn't found in a predecessor,
-      // conservatively add the block argument to the result set.
-      SmallVector<Value> operands;
-      bool anyUnknown = false;
-      for (auto iter = parentBlock->pred_begin();
-           iter != parentBlock->pred_end(); iter++) {
-        auto branch = dyn_cast<BranchOpInterface>((*iter)->getTerminator());
-        if (!branch) {
-          result.push_back(blockArg);
-          anyUnknown = true;
-          break;
-        }
-
-        Value operand = branch.getSuccessorOperands(
-            iter.getSuccessorIndex())[blockArg.getArgNumber()];
-        if (!operand) {
-          result.push_back(blockArg);
-          anyUnknown = true;
-          break;
-        }
-
-        operands.push_back(operand);
-      }
-
-      if (!anyUnknown)
-        llvm::append_range(workList, operands);
-
-      continue;
-    }
+    // If this place is reached, `val` is a block argument that is not
+    // understood. Therefore, we conservatively interrupt.
+    // Note: Dealing with function arguments is not necessary, as the slice
+    // would have to go through an SSACopyOp first.
+    return WalkContinuation::interrupt();
+  });
 
-    result.push_back(current);
-  } while (!workList.empty());
+  if (walkResult.wasInterrupted())
+    return failure();
 
   return result;
 }
@@ -363,9 +323,14 @@ static void createNewAliasScopesFromNoAliasParameter(
 
       // Find the set of underlying pointers that this pointer is based on.
       SmallPtrSet<Value, 4> basedOnPointers;
-      for (Value pointer : pointerArgs)
-        llvm::copy(getUnderlyingObjectSet(pointer),
+      for (Value pointer : pointerArgs) {
+        FailureOr<SmallVector<Value>> underlyingObjectSet =
+            getUnderlyingObjectSet(pointer);
+        if (failed(underlyingObjectSet))
+          return;
+        llvm::copy(*underlyingObjectSet,
                    std::inserter(basedOnPointers, basedOnPointers.begin()));
+      }
 
       bool aliasesOtherKnownObject = false;
       // Go through the based on pointers and check that they are either:
diff --git a/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir b/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir
index 0b8b60e963bb01..a91b991c5ed2b9 100644
--- a/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir
+++ b/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir
@@ -296,6 +296,60 @@ llvm.func @bar(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
 
 llvm.func @random() -> i1
 
+llvm.func @region_branch(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) {
+  %0 = llvm.mlir.constant(5 : i64) : i32
+  test.region_if %arg0: !llvm.ptr -> !llvm.ptr then {
+  ^bb0(%arg2: !llvm.ptr):
+    test.region_if_yield %arg0 : !llvm.ptr
+  } else {
+  ^bb0(%arg2: !llvm.ptr):
+    test.region_if_yield %arg0 : !llvm.ptr
+  } join {
+  ^bb0(%arg2: !llvm.ptr):
+    llvm.store %0, %arg2 : i32, !llvm.ptr
+    test.region_if_yield %arg0 : !llvm.ptr
+  }
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @region_branch_inlining
+// CHECK: llvm.store
+// CHECK-SAME: alias_scopes = [#[[$ARG0_SCOPE]]]
+// CHECK-SAME: noalias_scopes = [#[[$ARG1_SCOPE]]]
+llvm.func @region_branch_inlining(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
+  llvm.call @region_branch(%arg0, %arg2) : (!llvm.ptr, !llvm.ptr) -> ()
+  llvm.return
+}
+
+// -----
+
+llvm.func @missing_region_branch(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) {
+  %0 = llvm.mlir.constant(5 : i64) : i32
+  "test.one_region_op"() ({
+  ^bb0(%arg2: !llvm.ptr):
+    llvm.store %0, %arg2 : i32, !llvm.ptr
+    "test.terminator"() : () -> ()
+  }) : () -> ()
+  llvm.return
+}
+
+// CHECK-LABEL: llvm.func @missing_region_branch_inlining
+// CHECK: llvm.store
+// CHECK-NOT: alias_scopes
+// CHECK-NOT: noalias_scopes
+llvm.func @missing_region_branch_inlining(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) {
+  llvm.call @missing_region_branch(%arg0, %arg2) : (!llvm.ptr, !llvm.ptr) -> ()
+  llvm.return
+}
+
+// -----
+
+// CHECK-DAG: #[[DOMAIN:.*]] = #llvm.alias_scope_domain<{{.*}}>
+// CHECK-DAG: #[[$ARG0_SCOPE:.*]] = #llvm.alias_scope<id = {{.*}}, domain = #[[DOMAIN]]{{(,.*)?}}>
+// CHECK-DAG: #[[$ARG1_SCOPE:.*]] = #llvm.alias_scope<id = {{.*}}, domain = #[[DOMAIN]]{{(,.*)?}}>
+
+llvm.func @random() -> i1
+
 llvm.func @block_arg(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) {
   %0 = llvm.mlir.constant(5 : i64) : i32
   %1 = llvm.mlir.constant(1 : i64) : i64

From 5f15c1776a462940464743dbc9e82c46fe7e14aa Mon Sep 17 00:00:00 2001
From: Yanzuo Liu <zwuis@outlook.com>
Date: Thu, 15 Aug 2024 16:46:07 +0800
Subject: [PATCH 42/47] [llvm][Docs] `_or_null` -> `_if_present` in
 Programmer's Manual (#98586)

`cast_or_null` is deprecated.

https://github.com/llvm/llvm-project/blob/062844615db5e141da118c1ad780bf102537f40a/llvm/include/llvm/Support/Casting.h#L717-L722
---
 llvm/docs/ProgrammersManual.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 231de56ef4cfee..41d1388e5bf7e9 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -164,20 +164,20 @@ rarely have to include this file directly).
   efficient to use the ``InstVisitor`` class to dispatch over the instruction
   type directly.
 
-``isa_and_nonnull<>``:
-  The ``isa_and_nonnull<>`` operator works just like the ``isa<>`` operator,
+``isa_and_present<>``:
+  The ``isa_and_present<>`` operator works just like the ``isa<>`` operator,
   except that it allows for a null pointer as an argument (which it then
   returns false).  This can sometimes be useful, allowing you to combine several
   null checks into one.
 
-``cast_or_null<>``:
-  The ``cast_or_null<>`` operator works just like the ``cast<>`` operator,
+``cast_if_present<>``:
+  The ``cast_if_present<>`` operator works just like the ``cast<>`` operator,
   except that it allows for a null pointer as an argument (which it then
   propagates).  This can sometimes be useful, allowing you to combine several
   null checks into one.
 
-``dyn_cast_or_null<>``:
-  The ``dyn_cast_or_null<>`` operator works just like the ``dyn_cast<>``
+``dyn_cast_if_present<>``:
+  The ``dyn_cast_if_present<>`` operator works just like the ``dyn_cast<>``
   operator, except that it allows for a null pointer as an argument (which it
   then propagates).  This can sometimes be useful, allowing you to combine
   several null checks into one.

From 05dfac23f1121aabb9675a38628e919689f993b0 Mon Sep 17 00:00:00 2001
From: Jorge Botto <23462171+jf-botto@users.noreply.github.com>
Date: Thu, 15 Aug 2024 09:49:40 +0100
Subject: [PATCH 43/47] [DAG] Adding m_FPToUI and m_FPToSI to SDPatternMatch.h
 (#104044)

Adds m_FPToUI/m_FPToSI matchers for ISD::FP_TO_UINT/ISD::FP_TO_SINT in SDPatternMatch.h with suitable test coverage.

Fixes https://github.com/llvm/llvm-project/issues/103872
---
 llvm/include/llvm/CodeGen/SDPatternMatch.h             |  8 ++++++++
 .../unittests/CodeGen/SelectionDAGPatternMatchTest.cpp | 10 ++++++++++
 2 files changed, 18 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h
index 96ece1559bc437..88ddd43a2a8913 100644
--- a/llvm/include/llvm/CodeGen/SDPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h
@@ -737,6 +737,14 @@ template <typename Opnd> inline UnaryOpc_match<Opnd> m_VScale(const Opnd &Op) {
   return UnaryOpc_match<Opnd>(ISD::VSCALE, Op);
 }
 
+template <typename Opnd> inline UnaryOpc_match<Opnd> m_FPToUI(const Opnd &Op) {
+  return UnaryOpc_match<Opnd>(ISD::FP_TO_UINT, Op);
+}
+
+template <typename Opnd> inline UnaryOpc_match<Opnd> m_FPToSI(const Opnd &Op) {
+  return UnaryOpc_match<Opnd>(ISD::FP_TO_SINT, Op);
+}
+
 // === Constants ===
 struct ConstantInt_match {
   APInt *BindVal;
diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
index 074247e6e7d184..b9414be98623af 100644
--- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
+++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp
@@ -238,9 +238,11 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) {
   SDLoc DL;
   auto Int32VT = EVT::getIntegerVT(Context, 32);
   auto Int64VT = EVT::getIntegerVT(Context, 64);
+  auto FloatVT = EVT::getFloatingPointVT(32);
 
   SDValue Op0 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT);
   SDValue Op1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int64VT);
+  SDValue Op2 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, FloatVT);
 
   SDValue ZExt = DAG->getNode(ISD::ZERO_EXTEND, DL, Int64VT, Op0);
   SDValue SExt = DAG->getNode(ISD::SIGN_EXTEND, DL, Int64VT, Op0);
@@ -252,6 +254,9 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) {
 
   SDValue VScale = DAG->getVScale(DL, Int32VT, APInt::getMaxValue(32));
 
+  SDValue FPToSI = DAG->getNode(ISD::FP_TO_SINT, DL, FloatVT, Op2);
+  SDValue FPToUI = DAG->getNode(ISD::FP_TO_UINT, DL, FloatVT, Op2);
+
   using namespace SDPatternMatch;
   EXPECT_TRUE(sd_match(ZExt, m_UnaryOp(ISD::ZERO_EXTEND, m_Value())));
   EXPECT_TRUE(sd_match(SExt, m_SExt(m_Value())));
@@ -263,6 +268,11 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) {
   EXPECT_FALSE(sd_match(Sub, m_Neg(m_Value())));
   EXPECT_FALSE(sd_match(Neg, m_Not(m_Value())));
   EXPECT_TRUE(sd_match(VScale, m_VScale(m_Value())));
+
+  EXPECT_TRUE(sd_match(FPToUI, m_FPToUI(m_Value())));
+  EXPECT_TRUE(sd_match(FPToSI, m_FPToSI(m_Value())));
+  EXPECT_FALSE(sd_match(FPToUI, m_FPToSI(m_Value())));
+  EXPECT_FALSE(sd_match(FPToSI, m_FPToUI(m_Value())));
 }
 
 TEST_F(SelectionDAGPatternMatchTest, matchConstants) {

From 9a9ce9112fd7ed87d305d3e4f9b05c98a04f2382 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Thu, 15 Aug 2024 11:01:26 +0200
Subject: [PATCH 44/47] [bazel] Port for
 141536544f4ec1d1bf24256157f4ff1a3bc07dae

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index ac9e311ad5110e..7273429d4f344f 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5425,8 +5425,9 @@ cc_library(
     includes = ["include"],
     deps = [
 	":DataLayoutInterfaces",
+        ":Analysis",
         ":FuncDialect",
-	":InliningUtils",
+	    ":InliningUtils",
         ":IR",
         ":LLVMDialect",
         ":LLVMPassIncGen",

From 7227b44f928a87b5d7fb05bd1539fdfb6d4958dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Horv=C3=A1th?= <xazax.hun@gmail.com>
Date: Thu, 15 Aug 2024 10:04:59 +0100
Subject: [PATCH 45/47] [clang][driver] Fix -print-target-triple OS version for
 apple targets (#104037)

The target needs to be initialized in order to compute the correct
target triple from the command line. Without initialized targets the OS
component of the triple might not reflect what would be computed by the
driver for an actual compiler invocation.

Fixes https://github.com/llvm/llvm-project/issues/61762
---
 clang/lib/Driver/Driver.cpp                   | 13 ++++--
 .../test/Driver/darwin-print-target-triple.c  | 42 +++++++++++++++++++
 2 files changed, 52 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/Driver/darwin-print-target-triple.c

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index e12416e51f8d24..5b95019c25cab6 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -2271,8 +2271,7 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
     return false;
   }
 
-  if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
-    ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
+  auto initializeTargets = [&]() {
     const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
     // The 'Darwin' toolchain is initialized only when its arguments are
     // computed. Get the default arguments for OFK_None to ensure that
@@ -2282,6 +2281,12 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
     // FIXME: For some more esoteric targets the default toolchain is not the
     //        correct one.
     C.getArgsForToolChain(&TC, Triple.getArchName(), Action::OFK_None);
+    return Triple;
+  };
+
+  if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) {
+    ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs());
+    const llvm::Triple Triple = initializeTargets();
     RegisterEffectiveTriple TripleRAII(TC, Triple);
     switch (RLT) {
     case ToolChain::RLT_CompilerRT:
@@ -2325,7 +2330,9 @@ bool Driver::HandleImmediateArgs(Compilation &C) {
   }
 
   if (C.getArgs().hasArg(options::OPT_print_target_triple)) {
-    llvm::outs() << TC.getTripleString() << "\n";
+    initializeTargets();
+    llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs()));
+    llvm::outs() << Triple.getTriple() << "\n";
     return false;
   }
 
diff --git a/clang/test/Driver/darwin-print-target-triple.c b/clang/test/Driver/darwin-print-target-triple.c
new file mode 100644
index 00000000000000..4f5fdfe9d0db34
--- /dev/null
+++ b/clang/test/Driver/darwin-print-target-triple.c
@@ -0,0 +1,42 @@
+// Test the output of -print-target-triple on Darwin.
+// See https://github.com/llvm/llvm-project/issues/61762
+
+//
+// All platforms
+//
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=x86_64-apple-macos -mmacos-version-min=15 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-MACOS %s
+// CHECK-CLANGRT-MACOS: x86_64-apple-macosx15.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-ios -mios-version-min=9 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-IOS %s
+// CHECK-CLANGRT-IOS: arm64-apple-ios9.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-watchos -mwatchos-version-min=3 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS %s
+// CHECK-CLANGRT-WATCHOS: arm64-apple-watchos3.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=armv7k-apple-watchos -mwatchos-version-min=3 \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS-ARMV7K %s
+// CHECK-CLANGRT-WATCHOS-ARMV7K: thumbv7-apple-watchos3.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-tvos -mtvos-version-min=1\
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-TVOS %s
+// CHECK-CLANGRT-TVOS: arm64-apple-tvos1.0.0
+
+// RUN: %clang -print-target-triple \
+// RUN:     --target=arm64-apple-driverkit \
+// RUN:     -resource-dir=%S/Inputs/resource_dir 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-CLANGRT-DRIVERKIT %s
+// CHECK-CLANGRT-DRIVERKIT: arm64-apple-driverkit19.0.0

From 3a3990cb05858e7892a4825c677891a980f1cea8 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof@amd.com>
Date: Thu, 15 Aug 2024 10:15:26 +0100
Subject: [PATCH 46/47] [Flang][OpenMP] Move assert for wrapper syms and block
 args to genLoopNestOp (#103731)

This patch adds an assert to `genLoopNestClauses` to ensure the number
of symbols and corresponding loop wrapper entry block arguments have the
same size. This is checked by some of the callers, but it makes more
sense moving it into the function itself and avoid having to replicate
it.
---
 flang/lib/Lower/OpenMP/OpenMP.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 95c55805dcabb3..64b581e8910d07 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -1371,6 +1371,9 @@ genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
               llvm::ArrayRef<const semantics::Symbol *> wrapperSyms,
               llvm::ArrayRef<mlir::BlockArgument> wrapperArgs,
               llvm::omp::Directive directive, DataSharingProcessor &dsp) {
+  assert(wrapperSyms.size() == wrapperArgs.size() &&
+         "Number of symbols and wrapper block arguments must match");
+
   auto ivCallback = [&](mlir::Operation *op) {
     genLoopVars(op, converter, loc, iv, wrapperSyms, wrapperArgs);
     return llvm::SmallVector<const semantics::Symbol *>(iv);
@@ -2083,8 +2086,6 @@ static void genCompositeDistributeSimd(
       llvm::concat<mlir::BlockArgument>(distributeOp.getRegion().getArguments(),
                                         simdOp.getRegion().getArguments()));
 
-  assert(wrapperArgs.empty() &&
-         "Block args for omp.simd and omp.distribute currently not expected");
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, /*wrapperSyms=*/{}, wrapperArgs,
                 llvm::omp::Directive::OMPD_distribute_simd, dsp);
@@ -2132,8 +2133,6 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter,
   auto wrapperArgs = llvm::to_vector(llvm::concat<mlir::BlockArgument>(
       wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments()));
 
-  assert(wsloopReductionSyms.size() == wrapperArgs.size() &&
-         "Number of symbols and wrapper block arguments must match");
   genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item,
                 loopNestClauseOps, iv, wsloopReductionSyms, wrapperArgs,
                 llvm::omp::Directive::OMPD_do_simd, dsp);

From 8107810cad24d41fe43c6777370c7b81ca83ad84 Mon Sep 17 00:00:00 2001
From: Justin Bogner <mail@justinbogner.com>
Date: Thu, 15 Aug 2024 12:16:52 +0300
Subject: [PATCH 47/47] [DirectX] Use a more consistent pass name for
 DXILTranslateMetadata

This updates the "dxil-metadata-emit" pass flag to be spelled
"dxil-translate-metadata" to better match the pass name.

Pull Request: https://github.com/llvm/llvm-project/pull/104249
---
 llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp     | 10 +++++-----
 llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll     |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll     |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll  |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll  |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll  |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll  |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll  |  2 +-
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll  |  3 +--
 llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll  |  2 +-
 llvm/test/CodeGen/DirectX/UAVMetadata.ll              |  2 +-
 llvm/test/CodeGen/DirectX/cbuf.ll                     |  2 +-
 llvm/test/CodeGen/DirectX/dxil_ver.ll                 |  2 +-
 llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll       |  2 +-
 llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll       |  2 +-
 llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll       |  2 +-
 llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll       |  2 +-
 llvm/test/CodeGen/DirectX/lib_entry.ll                |  2 +-
 llvm/test/Frontend/HLSL/empty_cs_entry.ll             |  2 +-
 20 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
index 583bce0f50e700..21089a232783a5 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -29,7 +29,7 @@ class DXILTranslateMetadata : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
   explicit DXILTranslateMetadata() : ModulePass(ID) {}
 
-  StringRef getPassName() const override { return "DXIL Metadata Emit"; }
+  StringRef getPassName() const override { return "DXIL Translate Metadata"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
@@ -67,9 +67,9 @@ ModulePass *llvm::createDXILTranslateMetadataPass() {
   return new DXILTranslateMetadata();
 }
 
-INITIALIZE_PASS_BEGIN(DXILTranslateMetadata, "dxil-metadata-emit",
-                      "DXIL Metadata Emit", false, false)
+INITIALIZE_PASS_BEGIN(DXILTranslateMetadata, "dxil-translate-metadata",
+                      "DXIL Translate Metadata", false, false)
 INITIALIZE_PASS_DEPENDENCY(DXILResourceMDWrapper)
 INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper)
-INITIALIZE_PASS_END(DXILTranslateMetadata, "dxil-metadata-emit",
-                    "DXIL Metadata Emit", false, false)
+INITIALIZE_PASS_END(DXILTranslateMetadata, "dxil-translate-metadata",
+                    "DXIL Translate Metadata", false, false)
diff --git a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll
index b9a8e3e80567e0..318d5a6210eeea 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel6.0-vertex"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll
index fdd21d627829b9..fb54fa916f33f9 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel6.8-compute"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll
index d2625fc8b96a9d..96d04f948c9b83 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel6-amplification"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll
index 24eb0d608d8bb6..8cba445bcb01e8 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -dxil-prepare  %s | FileCheck %s  --check-prefix=REMOVE_EXTRA_ATTRIBUTE
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll
index 5c28c9305f01bf..662620cf9f95cb 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel6.6-geometry"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll
index e60023d1b3a5fb..b405f8e915a329 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel6.6-hull"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll
index 7f0bea95c04822..26f3d287242edd 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel6.3-library"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll
index dd033b9a9722b9..422d4add912f3f 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel6.6-mesh"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll
index 47da321df3e42e..cdb9a6f0f6a4f4 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll
@@ -1,5 +1,4 @@
-
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel5.0-pixel"
 
diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll
index dcc68586dd4b9f..6b3501cc1dbaf7 100644
--- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll
+++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s
 ; RUN: opt -S -passes="print<dxil-metadata>" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS
 target triple = "dxil-pc-shadermodel-vertex"
 
diff --git a/llvm/test/CodeGen/DirectX/UAVMetadata.ll b/llvm/test/CodeGen/DirectX/UAVMetadata.ll
index bdad9fd40c9bd3..b10112a044df58 100644
--- a/llvm/test/CodeGen/DirectX/UAVMetadata.ll
+++ b/llvm/test/CodeGen/DirectX/UAVMetadata.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
 ; RUN: opt -S --passes="print-dxil-resource-md" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
 ; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT
 
diff --git a/llvm/test/CodeGen/DirectX/cbuf.ll b/llvm/test/CodeGen/DirectX/cbuf.ll
index 38f08fad995d1f..e31a659728fcf2 100644
--- a/llvm/test/CodeGen/DirectX/cbuf.ll
+++ b/llvm/test/CodeGen/DirectX/cbuf.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD
 ; RUN: opt -S --passes="print-dxil-resource-md" < %s 2>&1 | FileCheck %s --check-prefix=PRINT
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
diff --git a/llvm/test/CodeGen/DirectX/dxil_ver.ll b/llvm/test/CodeGen/DirectX/dxil_ver.ll
index e9923a3abce02d..3c1d2e81020098 100644
--- a/llvm/test/CodeGen/DirectX/dxil_ver.ll
+++ b/llvm/test/CodeGen/DirectX/dxil_ver.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-pc-shadermodel6.3-library"
 
diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll
index 0cfb839746b93e..1a618092c5fed3 100644
--- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll
+++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-unknown-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll
index b6d29f8d18d79f..6886f2690209dc 100644
--- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll
+++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-unknown-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll
index d023d7906fdc52..3b08b25542201c 100644
--- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll
+++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-unknown-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll
index 38c2cd18b5ca1d..f01afbdab96733 100644
--- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll
+++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-unknown-shadermodel6.7-library"
diff --git a/llvm/test/CodeGen/DirectX/lib_entry.ll b/llvm/test/CodeGen/DirectX/lib_entry.ll
index 5254a088055888..9aa63c26ce845a 100644
--- a/llvm/test/CodeGen/DirectX/lib_entry.ll
+++ b/llvm/test/CodeGen/DirectX/lib_entry.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s
+; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
 target triple = "dxil-unknown-shadermodel6.7-library"
 
diff --git a/llvm/test/Frontend/HLSL/empty_cs_entry.ll b/llvm/test/Frontend/HLSL/empty_cs_entry.ll
index 45b0faeaa44d44..32736aeeb542ce 100644
--- a/llvm/test/Frontend/HLSL/empty_cs_entry.ll
+++ b/llvm/test/Frontend/HLSL/empty_cs_entry.ll
@@ -1,4 +1,4 @@
-; RUN: %if directx-registered-target %{ opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXIL-CHECK %}
+; RUN: %if directx-registered-target %{ opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXIL-CHECK %}
 ; RUN: %if spirv-registered-target   %{ llc %s -mtriple=spirv-unknown-unknown -o - | FileCheck %s --check-prefix=SPIRV-CHECK %}
 
 target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"