From 3265dfe3e620d526ca15dcecaa1c68e63ceaba45 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2024 14:22:36 -0700 Subject: [PATCH 01/47] [RISCV] Add signext attribute to return of fmv_x_w test in float-convert.ll. NFC This shows that Zfinx generates a sext.w instruction on RV64. The fadd.s should have filled the upper bits of the GPR with sign bits so this is unnecessary. Proving it is unnecessary might be difficult though. --- llvm/test/CodeGen/RISCV/float-convert.ll | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index 21bf6618c52a26..805ddee4ac3f6f 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -336,17 +336,23 @@ start: } declare i32 @llvm.fptoui.sat.i32.f32(float) -define i32 @fmv_x_w(float %a, float %b) nounwind { +define signext i32 @fmv_x_w(float %a, float %b) nounwind { ; CHECKIF-LABEL: fmv_x_w: ; CHECKIF: # %bb.0: ; CHECKIF-NEXT: fadd.s fa5, fa0, fa1 ; CHECKIF-NEXT: fmv.x.w a0, fa5 ; CHECKIF-NEXT: ret ; -; CHECKIZFINX-LABEL: fmv_x_w: -; CHECKIZFINX: # %bb.0: -; CHECKIZFINX-NEXT: fadd.s a0, a0, a1 -; CHECKIZFINX-NEXT: ret +; RV32IZFINX-LABEL: fmv_x_w: +; RV32IZFINX: # %bb.0: +; RV32IZFINX-NEXT: fadd.s a0, a0, a1 +; RV32IZFINX-NEXT: ret +; +; RV64IZFINX-LABEL: fmv_x_w: +; RV64IZFINX: # %bb.0: +; RV64IZFINX-NEXT: fadd.s a0, a0, a1 +; RV64IZFINX-NEXT: sext.w a0, a0 +; RV64IZFINX-NEXT: ret ; ; RV32I-LABEL: fmv_x_w: ; RV32I: # %bb.0: @@ -362,6 +368,7 @@ define i32 @fmv_x_w(float %a, float %b) nounwind { ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: sext.w a0, a0 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret From 4bac8fd8904904bc7d502f39851eef50b5afff73 Mon Sep 17 00:00:00 2001 From: Connie <60797237+connieyzhu@users.noreply.github.com> Date: Wed, 14 Aug 2024 14:45:02 -0700 Subject: [PATCH 02/47] [llvm-lit][test][NFC] Moved cat command tests into separate lit test file (#102366) This patch separates the lit tests that check for the functionality of lit's built-in cat command into its own test file and folder. This is a prerequisite for https://github.com/llvm/llvm-project/pull/101530. --- .../Inputs/{shtest-shell => }/check_path.py | 0 .../cat-error-0.txt | 0 .../cat-error-1.txt | 0 .../utils/lit/tests/Inputs/shtest-cat/cat.txt | 83 ++++++++++++++ .../cat_nonprinting.bin | Bin .../utils/lit/tests/Inputs/shtest-cat/lit.cfg | 8 ++ .../tests/Inputs/shtest-shell/valid-shell.txt | 108 ++---------------- llvm/utils/lit/tests/shtest-cat.py | 23 ++++ llvm/utils/lit/tests/shtest-shell.py | 18 +-- 9 files changed, 127 insertions(+), 113 deletions(-) rename llvm/utils/lit/tests/Inputs/{shtest-shell => }/check_path.py (100%) rename llvm/utils/lit/tests/Inputs/{shtest-shell => shtest-cat}/cat-error-0.txt (100%) rename llvm/utils/lit/tests/Inputs/{shtest-shell => shtest-cat}/cat-error-1.txt (100%) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt rename llvm/utils/lit/tests/Inputs/{shtest-shell => shtest-cat}/cat_nonprinting.bin (100%) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg create mode 100644 llvm/utils/lit/tests/shtest-cat.py diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/check_path.py b/llvm/utils/lit/tests/Inputs/check_path.py similarity index 100% rename from llvm/utils/lit/tests/Inputs/shtest-shell/check_path.py rename to llvm/utils/lit/tests/Inputs/check_path.py diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-0.txt similarity index 100% rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-0.txt rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-0.txt diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-1.txt similarity index 100% rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat-error-1.txt rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat-error-1.txt diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt new file mode 100644 index 00000000000000..7375a7497e5bec --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-cat/cat.txt @@ -0,0 +1,83 @@ +## Test cat command with a single file. +# +# RUN: rm -rf %T/testCat +# RUN: mkdir -p %T/testCat +# RUN: echo "abcdefgh" > %T/testCat/temp.write +# RUN: cat %T/testCat/temp.write > %T/testCat/tempcat.write +# RUN: %{python} %S/../check_path.py file %T/testCat/tempcat.write > %T/testCat/path.out +# RUN: FileCheck --check-prefix=FILE-EXISTS < %T/testCat/path.out %s +# RUN: FileCheck --check-prefix=CAT-OUTPUT < %T/testCat/tempcat.write %s +# FILE-EXISTS: True +# CAT-OUTPUT: abcdefgh +# +## Test cat command with multiple files. +# +# RUN: rm -rf %T/testCat +# RUN: mkdir -p %T/testCat +# RUN: echo "abcdefgh" > %T/testCat/temp1.write +# RUN: echo "efghijkl" > %T/testCat/temp2.write +# RUN: echo "mnopqrst" > %T/testCat/temp3.write +# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write %T/testCat/temp3.write > %T/testCat/tempmulticat.write +# RUN: %{python} %S/../check_path.py file %T/testCat/tempmulticat.write > %T/testCat/path.out +# RUN: FileCheck --check-prefix=MULTI-FILE-EXISTS < %T/testCat/path.out %s +# RUN: FileCheck --check-prefix=MULTI-CAT-OUTPUT < %T/testCat/tempmulticat.write %s +# MULTI-FILE-EXISTS: True +# MULTI-CAT-OUTPUT: abcdefgh +# MULTI-CAT-OUTPUT-NEXT: efghijkl +# MULTI-CAT-OUTPUT-NEXT: mnopqrst +# +## Test cat command with multiple files and piped output to FileCheck. +# +# RUN: rm -rf %T/testCat +# RUN: mkdir -p %T/testCat +# RUN: echo "abcdefgh" > %T/testCat/temp1.write +# RUN: echo "efghijkl" > %T/testCat/temp2.write +# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write | FileCheck --check-prefix=PIPED-CAT-OUTPUT %s +# PIPED-CAT-OUTPUT: abcdefgh +# PIPED-CAT-OUTPUT-NEXT: efghijkl +# +## Test cat command with multiple files and glob expressions. +# +# RUN: rm -rf %T/testCat +# RUN: mkdir -p %T/testCat +# RUN: echo "cvbnm" > %T/testCat/temp1.write +# RUN: echo "qwerty" > %T/testCat/temp2.write +# RUN: cat %T/testCat/*.write | FileCheck --check-prefix=GLOB-CAT-OUTPUT %s +# GLOB-CAT-OUTPUT: cvbnm +# GLOB-CAT-OUTPUT-NEXT: qwerty +# +## Test cat command with -v option +# +# RUN: cat -v %S/cat_nonprinting.bin | FileCheck --check-prefix=NP-CAT-OUTPUT %s +# NP-CAT-OUTPUT: ^@^A^B^C^D^E^F^G ^H +# NP-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S +# NP-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&' +# NP-CAT-OUTPUT-NEXT: ()*+,-./0123456789:; +# NP-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO +# NP-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc +# NP-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw +# NP-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K +# NP-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_ +# NP-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3 +# NP-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-M-?M-@M-AM-BM-CM-DM-EM-FM-G +# NP-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[ +# NP-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o +# NP-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^? +# +## Test cat command with -show-nonprinting option +# +# RUN: cat --show-nonprinting %S/cat_nonprinting.bin | FileCheck --check-prefix=NPLONG-CAT-OUTPUT %s +# NPLONG-CAT-OUTPUT: ^@^A^B^C^D^E^F^G ^H +# NPLONG-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S +# NPLONG-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&' +# NPLONG-CAT-OUTPUT-NEXT: ()*+,-./0123456789:; +# NPLONG-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO +# NPLONG-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc +# NPLONG-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw +# NPLONG-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K +# NPLONG-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_ +# NPLONG-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3 +# NPLONG-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-M-?M-@M-AM-BM-CM-DM-EM-FM-G +# NPLONG-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[ +# NPLONG-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o +# NPLONG-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^? diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin b/llvm/utils/lit/tests/Inputs/shtest-cat/cat_nonprinting.bin similarity index 100% rename from llvm/utils/lit/tests/Inputs/shtest-shell/cat_nonprinting.bin rename to llvm/utils/lit/tests/Inputs/shtest-cat/cat_nonprinting.bin diff --git a/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg new file mode 100644 index 00000000000000..8f197946e28b5c --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-cat/lit.cfg @@ -0,0 +1,8 @@ +import lit.formats + +config.name = "shtest-cat" +config.suffixes = [".txt"] +config.test_format = lit.formats.ShTest() +config.test_source_root = None +config.test_exec_root = None +config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) diff --git a/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt b/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt index 7267b9b9ef5aba..75ce8b7733ad7d 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt +++ b/llvm/utils/lit/tests/Inputs/shtest-shell/valid-shell.txt @@ -2,13 +2,13 @@ # Check force remove commands success whether the file does or doesn't exist. # # RUN: rm -f %t.write -# RUN: %{python} %S/check_path.py file %t.write > %t.out +# RUN: %{python} %S/../check_path.py file %t.write > %t.out # RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s # RUN: echo "create a temp file" > %t.write -# RUN: %{python} %S/check_path.py file %t.write > %t.out +# RUN: %{python} %S/../check_path.py file %t.write > %t.out # RUN: FileCheck --check-prefix=FILE-EXIST < %t.out %s # RUN: rm -f %t.write -# RUN: %{python} %S/check_path.py file %t.write > %t.out +# RUN: %{python} %S/../check_path.py file %t.write > %t.out # RUN: FileCheck --check-prefix=REMOVE-FILE < %t.out %s # # REMOVE-FILE: False @@ -19,14 +19,14 @@ # # Check the mkdir command with -p option. # RUN: rm -f -r %T/test -# RUN: %{python} %S/check_path.py dir %T/test > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test > %t.out # RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s # RUN: mkdir -p %T/test -# RUN: %{python} %S/check_path.py dir %T/test > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test > %t.out # RUN: FileCheck --check-prefix=MAKE-PARENT-DIR < %t.out %s # RUN: rm -f %T/test || true # RUN: rm -f -r %T/test -# RUN: %{python} %S/check_path.py dir %T/test > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test > %t.out # RUN: FileCheck --check-prefix=REMOVE-PARENT-DIR < %t.out %s # # MAKE-PARENT-DIR: True @@ -36,13 +36,13 @@ # # RUN: rm -rf %T/test1 # RUN: mkdir %T/test1 -# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out # RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s # RUN: cd %T/test1 && mkdir foo -# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out # RUN: FileCheck --check-prefix=MAKE-DIR < %t.out %s # RUN: cd %T && rm -rf %T/test1 -# RUN: %{python} %S/check_path.py dir %T/test1 > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test1 > %t.out # RUN: FileCheck --check-prefix=REMOVE-DIR < %t.out %s # # MAKE-DIR: True @@ -52,16 +52,16 @@ # # RUN: rm -rf %T/test # RUN: mkdir -p %T/test/test1 %T/test/test2 -# RUN: %{python} %S/check_path.py dir %T/test %T/test/test1 %T/test/test2 > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test %T/test/test1 %T/test/test2 > %t.out # RUN: FileCheck --check-prefix=DIRS-EXIST < %t.out %s # RUN: mkdir %T/test || true # RUN: echo "create a temp file" > %T/test/temp.write # RUN: echo "create a temp1 file" > %T/test/test1/temp1.write # RUN: echo "create a temp2 file" > %T/test/test2/temp2.write -# RUN: %{python} %S/check_path.py file %T/test/temp.write %T/test/test1/temp1.write %T/test/test2/temp2.write> %t.out +# RUN: %{python} %S/../check_path.py file %T/test/temp.write %T/test/test1/temp1.write %T/test/test2/temp2.write> %t.out # RUN: FileCheck --check-prefix=FILES-EXIST < %t.out %s # RUN: rm -r -f %T/* -# RUN: %{python} %S/check_path.py dir %T/test > %t.out +# RUN: %{python} %S/../check_path.py dir %T/test > %t.out # RUN: FileCheck --check-prefix=REMOVE-ALL < %t.out %s # # DIRS-EXIST: True @@ -85,87 +85,3 @@ # RUN: cd %T/dir1 && echo "hello" > temp1.txt # RUN: cd %T/dir2 && echo "hello" > temp2.txt # RUN: diff temp2.txt ../dir1/temp1.txt -# -# Check cat command with single file. -# -# RUN: rm -rf %T/testCat -# RUN: mkdir -p %T/testCat -# RUN: echo "abcdefgh" > %T/testCat/temp.write -# RUN: cat %T/testCat/temp.write > %T/testCat/tempcat.write -# RUN: %{python} %S/check_path.py file %T/testCat/tempcat.write > %T/testCat/path.out -# RUN: FileCheck --check-prefix=FILE-EXISTS < %T/testCat/path.out %s -# RUN: FileCheck --check-prefix=CAT-OUTPUT < %T/testCat/tempcat.write %s -# FILE-EXISTS: True -# CAT-OUTPUT: abcdefgh -# -# Check cat command with multiple files. -# -# RUN: rm -rf %T/testCat -# RUN: mkdir -p %T/testCat -# RUN: echo "abcdefgh" > %T/testCat/temp1.write -# RUN: echo "efghijkl" > %T/testCat/temp2.write -# RUN: echo "mnopqrst" > %T/testCat/temp3.write -# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write %T/testCat/temp3.write > %T/testCat/tempmulticat.write -# RUN: %{python} %S/check_path.py file %T/testCat/tempmulticat.write > %T/testCat/path.out -# RUN: FileCheck --check-prefix=MULTI-FILE-EXISTS < %T/testCat/path.out %s -# RUN: FileCheck --check-prefix=MULTI-CAT-OUTPUT < %T/testCat/tempmulticat.write %s -# MULTI-FILE-EXISTS: True -# MULTI-CAT-OUTPUT: abcdefgh -# MULTI-CAT-OUTPUT-NEXT: efghijkl -# MULTI-CAT-OUTPUT-NEXT: mnopqrst -# -# Check cat command with multiple files and piped output to FileCheck. -# -# RUN: rm -rf %T/testCat -# RUN: mkdir -p %T/testCat -# RUN: echo "abcdefgh" > %T/testCat/temp1.write -# RUN: echo "efghijkl" > %T/testCat/temp2.write -# RUN: cat %T/testCat/temp1.write %T/testCat/temp2.write | FileCheck --check-prefix=PIPED-CAT-OUTPUT %s -# PIPED-CAT-OUTPUT: abcdefgh -# PIPED-CAT-OUTPUT-NEXT: efghijkl -# -# Check cat command with multiple files and glob expressions. -# -# RUN: rm -rf %T/testCat -# RUN: mkdir -p %T/testCat -# RUN: echo "cvbnm" > %T/testCat/temp1.write -# RUN: echo "qwerty" > %T/testCat/temp2.write -# RUN: cat %T/testCat/*.write | FileCheck --check-prefix=GLOB-CAT-OUTPUT %s -# GLOB-CAT-OUTPUT: cvbnm -# GLOB-CAT-OUTPUT-NEXT: qwerty -# -# Check cat command with -v option -# -# RUN: cat -v %S/cat_nonprinting.bin | FileCheck --check-prefix=NP-CAT-OUTPUT %s -# NP-CAT-OUTPUT: ^@^A^B^C^D^E^F^G ^H -# NP-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S -# NP-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&' -# NP-CAT-OUTPUT-NEXT: ()*+,-./0123456789:; -# NP-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO -# NP-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc -# NP-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw -# NP-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K -# NP-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_ -# NP-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3 -# NP-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-M-?M-@M-AM-BM-CM-DM-EM-FM-G -# NP-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[ -# NP-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o -# NP-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^? -# -# Check cat command with -show-nonprinting option -# -# RUN: cat --show-nonprinting %S/cat_nonprinting.bin | FileCheck --check-prefix=NPLONG-CAT-OUTPUT %s -# NPLONG-CAT-OUTPUT: ^@^A^B^C^D^E^F^G ^H -# NPLONG-CAT-OUTPUT-NEXT: ^K^L^M^N^O^P^Q^R^S -# NPLONG-CAT-OUTPUT-NEXT: ^T^U^V^W^X^Y^Z^[^\^]^^^_ !"#$%&' -# NPLONG-CAT-OUTPUT-NEXT: ()*+,-./0123456789:; -# NPLONG-CAT-OUTPUT-NEXT: <=>?@ABCDEFGHIJKLMNO -# NPLONG-CAT-OUTPUT-NEXT: PQRSTUVWXYZ[\]^_`abc -# NPLONG-CAT-OUTPUT-NEXT: defghijklmnopqrstuvw -# NPLONG-CAT-OUTPUT-NEXT: xyz{|}~^?M-^@M-^AM-^BM-^CM-^DM-^EM-^FM-^GM-^HM-^IM-^JM-^K -# NPLONG-CAT-OUTPUT-NEXT: M-^LM-^MM-^NM-^OM-^PM-^QM-^RM-^SM-^TM-^UM-^VM-^WM-^XM-^YM-^ZM-^[M-^\M-^]M-^^M-^_ -# NPLONG-CAT-OUTPUT-NEXT: M- M-!M-"M-#M-$M-%M-&M-'M-(M-)M-*M-+M-,M--M-.M-/M-0M-1M-2M-3 -# NPLONG-CAT-OUTPUT-NEXT: M-4M-5M-6M-7M-8M-9M-:M-;M-M-?M-@M-AM-BM-CM-DM-EM-FM-G -# NPLONG-CAT-OUTPUT-NEXT: M-HM-IM-JM-KM-LM-MM-NM-OM-PM-QM-RM-SM-TM-UM-VM-WM-XM-YM-ZM-[ -# NPLONG-CAT-OUTPUT-NEXT: M-\M-]M-^M-_M-`M-aM-bM-cM-dM-eM-fM-gM-hM-iM-jM-kM-lM-mM-nM-o -# NPLONG-CAT-OUTPUT-NEXT: M-pM-qM-rM-sM-tM-uM-vM-wM-xM-yM-zM-{M-|M-}M-~M-^? diff --git a/llvm/utils/lit/tests/shtest-cat.py b/llvm/utils/lit/tests/shtest-cat.py new file mode 100644 index 00000000000000..5efe25c41684a1 --- /dev/null +++ b/llvm/utils/lit/tests/shtest-cat.py @@ -0,0 +1,23 @@ +## Test the cat command. +# +# RUN: not %{lit} -a -v %{inputs}/shtest-cat \ +# RUN: | FileCheck -match-full-lines %s +# END. + +# CHECK: FAIL: shtest-cat :: cat-error-0.txt ({{[^)]*}}) +# CHECK: cat -b temp1.txt +# CHECK: # .---command stderr{{-*}} +# CHECK-NEXT: # | Unsupported: 'cat': option -b not recognized +# CHECK: # error: command failed with exit status: 1 + +# CHECK: FAIL: shtest-cat :: cat-error-1.txt ({{[^)]*}}) +# CHECK: cat temp1.txt +# CHECK: # .---command stderr{{-*}} +# CHECK-NEXT: # | [Errno 2] No such file or directory: 'temp1.txt' +# CHECK: # error: command failed with exit status: 1 + +# CHECK: PASS: shtest-cat :: cat.txt ({{[^)]*}}) + +# CHECK: Total Discovered Tests: 3 +# CHECK-NEXT: Passed: 1 {{\([0-9]*\.[0-9]*%\)}} +# CHECK-NEXT: Failed: 2 {{\([0-9]*\.[0-9]*%\)}} diff --git a/llvm/utils/lit/tests/shtest-shell.py b/llvm/utils/lit/tests/shtest-shell.py index 86851194880620..8f2b865f333a57 100644 --- a/llvm/utils/lit/tests/shtest-shell.py +++ b/llvm/utils/lit/tests/shtest-shell.py @@ -18,22 +18,6 @@ # CHECK: -- Testing: -# CHECK: FAIL: shtest-shell :: cat-error-0.txt -# CHECK: *** TEST 'shtest-shell :: cat-error-0.txt' FAILED *** -# CHECK: cat -b temp1.txt -# CHECK: # .---command stderr{{-*}} -# CHECK: # | Unsupported: 'cat': option -b not recognized -# CHECK: # error: command failed with exit status: 1 -# CHECK: *** - -# CHECK: FAIL: shtest-shell :: cat-error-1.txt -# CHECK: *** TEST 'shtest-shell :: cat-error-1.txt' FAILED *** -# CHECK: cat temp1.txt -# CHECK: # .---command stderr{{-*}} -# CHECK: # | [Errno 2] No such file or directory: 'temp1.txt' -# CHECK: # error: command failed with exit status: 1 -# CHECK: *** - # CHECK: FAIL: shtest-shell :: colon-error.txt # CHECK: *** TEST 'shtest-shell :: colon-error.txt' FAILED *** # CHECK: : @@ -651,4 +635,4 @@ # CHECK: PASS: shtest-shell :: valid-shell.txt # CHECK: Unresolved Tests (1) -# CHECK: Failed Tests (38) +# CHECK: Failed Tests (36) From e9b7983fc6826eceb819a3cdb0301c401847ade4 Mon Sep 17 00:00:00 2001 From: Harini0924 <79345568+Harini0924@users.noreply.github.com> Date: Wed, 14 Aug 2024 14:49:19 -0700 Subject: [PATCH 03/47] [llvm-lit] Fix Unhashable TypeError when using lit's internal shell (#101590) When using the lit internal shell with the command: ``` LIT_USE_INTERNAL_SHELL=1 ninja check-compiler-rt ``` The follow error is encountered: ``` File "TestRunner.py", line 770, in _executeShCmd inproc_builtin = inproc_builtins.get(args[0], None) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TypeError: unhashable type: 'GlobItem' ``` This error is in a compiler-rt file: ``` TestCases/Linux/long-object-path.cpp ``` This error occurs because `args[0]` is of type `GlobItem`, which is not hashable, leading to a `TypeError` when it is passed in `inproc_builtins.get()`. To resolve this issue, I have updated the implementation to ensure that `args[0]` is hashable before it is used in `inproc_builtins`. fixes: #102389 [link to RFC](https://discourse.llvm.org/t/rfc-enabling-the-lit-internal-shell-by-default/80179) --- llvm/utils/lit/lit/TestRunner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index da7fa86fd39173..cc903f9e3a1520 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -767,6 +767,10 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): # echo-appending to a file. # FIXME: Standardize on the builtin echo implementation. We can use a # temporary file to sidestep blocking pipe write issues. + + # Ensure args[0] is hashable. + args[0] = expand_glob(args[0], cmd_shenv.cwd)[0] + inproc_builtin = inproc_builtins.get(args[0], None) if inproc_builtin and (args[0] != "echo" or len(cmd.commands) == 1): # env calling an in-process builtin is useless, so we take the safe From a88f3a331137d6379f2f1189d5eb4b086c686ab4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 14 Aug 2024 16:54:38 -0500 Subject: [PATCH 04/47] [Clang] Add `__CLANG_GPU_DISABLE_MATH_WRAPPERS` macro for offloading math (#98234) Summary: Currently we replace all math calls with vendor specific ones. This patch introduces a macro `__CLANG_GPU_DISABLE_MATH_WRAPPERS` that when defined will disable this. I went this route instead of a flag for two reasons. One, I think we have too many flags as is, and we already have `-nogpuinc` to cover disabling these wrappers entirely, so this would be a really specific subset of that. Second, these math headers aren't easily decoupled by simply not including a single header from the clang driver layer. There's the cmath and the regular math forward declares it would disable as well. Note, this currently causes errors because the GPU `libm` doesn't have `powi`, that's an NVIDIA extension I'll add to LLVM libm. --- clang/lib/Headers/__clang_cuda_math.h | 5 +++ clang/lib/Headers/__clang_hip_math.h | 5 +++ clang/test/Headers/gpu_disabled_math.cpp | 41 ++++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 clang/test/Headers/gpu_disabled_math.cpp diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h index 04019165068668..44c6e9a4e48d1b 100644 --- a/clang/lib/Headers/__clang_cuda_math.h +++ b/clang/lib/Headers/__clang_cuda_math.h @@ -12,6 +12,10 @@ #error "This file is for CUDA compilation only." #endif +// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard +// libcalls reach the link step instead of being eagerly replaced. +#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS + #ifndef __OPENMP_NVPTX__ #if CUDA_VERSION < 9000 #error This file is intended to be used with CUDA-9+ only. @@ -345,4 +349,5 @@ __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } #pragma pop_macro("__DEVICE_VOID__") #pragma pop_macro("__FAST_OR_SLOW") +#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS #endif // __CLANG_CUDA_MATH_H__ diff --git a/clang/lib/Headers/__clang_hip_math.h b/clang/lib/Headers/__clang_hip_math.h index 11e1e7d032586f..8468751d9de260 100644 --- a/clang/lib/Headers/__clang_hip_math.h +++ b/clang/lib/Headers/__clang_hip_math.h @@ -13,6 +13,10 @@ #error "This file is for HIP and OpenMP AMDGCN device compilation only." #endif +// The __CLANG_GPU_DISABLE_MATH_WRAPPERS macro provides a way to let standard +// libcalls reach the link step instead of being eagerly replaced. +#ifndef __CLANG_GPU_DISABLE_MATH_WRAPPERS + #if !defined(__HIPCC_RTC__) #include #include @@ -1321,4 +1325,5 @@ __host__ inline static int max(int __arg1, int __arg2) { #pragma pop_macro("__RETURN_TYPE") #pragma pop_macro("__FAST_OR_SLOW") +#endif // __CLANG_GPU_DISABLE_MATH_WRAPPERS #endif // __CLANG_HIP_MATH_H__ diff --git a/clang/test/Headers/gpu_disabled_math.cpp b/clang/test/Headers/gpu_disabled_math.cpp new file mode 100644 index 00000000000000..6e697f52120aeb --- /dev/null +++ b/clang/test/Headers/gpu_disabled_math.cpp @@ -0,0 +1,41 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -D __CLANG_GPU_DISABLE_MATH_WRAPPERS | FileCheck -check-prefix=AMDGPU %s + +// RUN: %clang_cc1 -include __clang_cuda_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -triple nvptx64-nvidia-cuda -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu sm_90 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -D __CLANG_GPU_DISABLE_MATH_WRAPPERS | FileCheck -check-prefix=NVPTX %s + +extern "C" double sin(double x); + +// AMDGPU-LABEL: define dso_local noundef double @_Z3food( +// AMDGPU-SAME: double noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// AMDGPU-NEXT: [[ENTRY:.*:]] +// AMDGPU-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5) +// AMDGPU-NEXT: [[X_ADDR:%.*]] = alloca double, align 8, addrspace(5) +// AMDGPU-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// AMDGPU-NEXT: [[X_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[X_ADDR]] to ptr +// AMDGPU-NEXT: store double [[X]], ptr [[X_ADDR_ASCAST]], align 8 +// AMDGPU-NEXT: [[TMP0:%.*]] = load double, ptr [[X_ADDR_ASCAST]], align 8 +// AMDGPU-NEXT: [[TMP1:%.*]] = call double @llvm.sin.f64(double [[TMP0]]) +// AMDGPU-NEXT: ret double [[TMP1]] +// +// NVPTX-LABEL: define dso_local noundef double @_Z3food( +// NVPTX-SAME: double noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// NVPTX-NEXT: [[ENTRY:.*:]] +// NVPTX-NEXT: [[X_ADDR:%.*]] = alloca double, align 8 +// NVPTX-NEXT: store double [[X]], ptr [[X_ADDR]], align 8 +// NVPTX-NEXT: [[TMP0:%.*]] = load double, ptr [[X_ADDR]], align 8 +// NVPTX-NEXT: [[TMP1:%.*]] = call double @llvm.sin.f64(double [[TMP0]]) +// NVPTX-NEXT: ret double [[TMP1]] +// +double foo(double x) { + return sin(x); +} From 743e99dcf5146dd4e2c20d20800e91595da47be9 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 13 Aug 2024 15:14:11 -0700 Subject: [PATCH 05/47] Reland "[flang][cuda] Use cuda runtime API #103488" CUDA Fortran is meant to be an equivalent to the runtime API. Therefore, it makes more sense to use the cuda rt API in the allocators for CUF. --- flang/include/flang/Runtime/CUDA/allocator.h | 7 ++-- flang/runtime/CUDA/CMakeLists.txt | 10 ++++-- flang/runtime/CUDA/allocator.cpp | 24 ++++++-------- flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 33 +------------------ 4 files changed, 22 insertions(+), 52 deletions(-) diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h index f0bfc1548e6458..4527c9f18fa054 100644 --- a/flang/include/flang/Runtime/CUDA/allocator.h +++ b/flang/include/flang/Runtime/CUDA/allocator.h @@ -13,11 +13,10 @@ #include "flang/Runtime/entry-names.h" #define CUDA_REPORT_IF_ERROR(expr) \ - [](CUresult result) { \ - if (!result) \ + [](cudaError_t err) { \ + if (err == cudaSuccess) \ return; \ - const char *name = nullptr; \ - cuGetErrorName(result, &name); \ + const char *name = cudaGetErrorName(err); \ if (!name) \ name = ""; \ Terminator terminator{__FILE__, __LINE__}; \ diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt index 88243536139e46..c9a20ebcc82e07 100644 --- a/flang/runtime/CUDA/CMakeLists.txt +++ b/flang/runtime/CUDA/CMakeLists.txt @@ -7,14 +7,20 @@ #===------------------------------------------------------------------------===# include_directories(${CUDAToolkit_INCLUDE_DIRS}) -find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) add_flang_library(CufRuntime allocator.cpp descriptor.cpp ) + +if (BUILD_SHARED_LIBS) + set(CUDA_RT_TARGET CUDA::cudart) +else() + set(CUDA_RT_TARGET CUDA::cudart_static) +endif() + target_link_libraries(CufRuntime PRIVATE FortranRuntime - ${CUDA_RUNTIME_LIBRARY} + ${CUDA_RT_TARGET} ) diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp index bd657b800c61e8..d4a473d58e86cd 100644 --- a/flang/runtime/CUDA/allocator.cpp +++ b/flang/runtime/CUDA/allocator.cpp @@ -15,7 +15,7 @@ #include "flang/ISO_Fortran_binding_wrapper.h" #include "flang/Runtime/allocator-registry.h" -#include "cuda.h" +#include "cuda_runtime.h" namespace Fortran::runtime::cuda { extern "C" { @@ -34,32 +34,28 @@ void RTDEF(CUFRegisterAllocator)() { void *CUFAllocPinned(std::size_t sizeInBytes) { void *p; - CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes)); + CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes)); return p; } -void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); } +void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); } void *CUFAllocDevice(std::size_t sizeInBytes) { - CUdeviceptr p = 0; - CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes)); - return reinterpret_cast(p); + void *p; + CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes)); + return p; } -void CUFFreeDevice(void *p) { - CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast(p))); -} +void CUFFreeDevice(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); } void *CUFAllocManaged(std::size_t sizeInBytes) { - CUdeviceptr p = 0; + void *p; CUDA_REPORT_IF_ERROR( - cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL)); + cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal)); return reinterpret_cast(p); } -void CUFFreeManaged(void *p) { - CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast(p))); -} +void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); } void *CUFAllocUnified(std::size_t sizeInBytes) { // Call alloc managed for the time being. diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp index 9f5ec289ee8f74..b51ff0ac006cc6 100644 --- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp +++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -14,7 +14,7 @@ #include "flang/Runtime/allocatable.h" #include "flang/Runtime/allocator-registry.h" -#include "cuda.h" +#include "cuda_runtime.h" using namespace Fortran::runtime; using namespace Fortran::runtime::cuda; @@ -25,38 +25,9 @@ static OwningPtr createAllocatable( CFI_attribute_allocatable); } -thread_local static int32_t defaultDevice = 0; - -CUdevice getDefaultCuDevice() { - CUdevice device; - CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice)); - return device; -} - -class ScopedContext { -public: - ScopedContext() { - // Static reference to CUDA primary context for device ordinal - // defaultDevice. - static CUcontext context = [] { - CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0)); - CUcontext ctx; - // Note: this does not affect the current context. - CUDA_REPORT_IF_ERROR( - cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice())); - return ctx; - }(); - - CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context)); - } - - ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); } -}; - TEST(AllocatableCUFTest, SimpleDeviceAllocate) { using Fortran::common::TypeCategory; RTNAME(CUFRegisterAllocator)(); - ScopedContext ctx; // REAL(4), DEVICE, ALLOCATABLE :: a(:) auto a{createAllocatable(TypeCategory::Real, 4)}; a->SetAllocIdx(kDeviceAllocatorPos); @@ -74,7 +45,6 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) { TEST(AllocatableCUFTest, SimplePinnedAllocate) { using Fortran::common::TypeCategory; RTNAME(CUFRegisterAllocator)(); - ScopedContext ctx; // INTEGER(4), PINNED, ALLOCATABLE :: a(:) auto a{createAllocatable(TypeCategory::Integer, 4)}; EXPECT_FALSE(a->HasAddendum()); @@ -93,7 +63,6 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) { TEST(AllocatableCUFTest, DescriptorAllocationTest) { using Fortran::common::TypeCategory; RTNAME(CUFRegisterAllocator)(); - ScopedContext ctx; // REAL(4), DEVICE, ALLOCATABLE :: a(:) auto a{createAllocatable(TypeCategory::Real, 4)}; Descriptor *desc = nullptr; From f1779ae53b5a8f65406648f1b69e3dd1ae0340b0 Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Wed, 14 Aug 2024 14:58:43 -0700 Subject: [PATCH 06/47] [bazel] Port 4bac8fd8904904bc7d502f39851eef50b5afff73 (#104278) --- .../llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel index b9e0a2e153ac1e..13f6f815d39950 100644 --- a/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/utils/lit/tests/BUILD.bazel @@ -33,5 +33,8 @@ expand_template( "//llvm:not", ] + glob(["Inputs/**"]), ) - for src in glob(["*/*.py"]) + for src in glob( + ["*/*.py"], + exclude = ["Inputs/**"], + ) ] From 48809fafbc083a2e4c03f70406b712ff18b42554 Mon Sep 17 00:00:00 2001 From: Joshua Batista Date: Wed, 14 Aug 2024 15:11:34 -0700 Subject: [PATCH 07/47] Remove unused variable, and unneeded extract element instruction (#103489) This PR removes an unneeded extract element instruction from codegen, along with the variable that captured that instruction's return value. --- clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl | 2 +- llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp | 1 - llvm/test/CodeGen/DirectX/normalize.ll | 6 ------ 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl index c72c8b3c222b6b..fc48c9b2589f7e 100644 --- a/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/normalize-errors.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -emit-llvm -disable-llvm-passes -verify -verify-ignore-unexpected +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -fnative-half-type -disable-llvm-passes -verify -verify-ignore-unexpected void test_too_few_arg() { diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 626321f44c2bfc..e63633b8a1e1ab 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -252,7 +252,6 @@ static bool expandNormalizeIntrinsic(CallInst *Orig) { return true; } - Value *Elt = Builder.CreateExtractElement(X, (uint64_t)0); unsigned XVecSize = XVec->getNumElements(); Value *DotProduct = nullptr; // use the dot intrinsic corresponding to the vector size diff --git a/llvm/test/CodeGen/DirectX/normalize.ll b/llvm/test/CodeGen/DirectX/normalize.ll index f3533cc56e7c25..e2c8a5d4656a65 100644 --- a/llvm/test/CodeGen/DirectX/normalize.ll +++ b/llvm/test/CodeGen/DirectX/normalize.ll @@ -22,7 +22,6 @@ entry: define noundef <2 x half> @test_normalize_half2(<2 x half> noundef %p0) { entry: - ; CHECK: extractelement <2 x half> %{{.*}}, i64 0 ; EXPCHECK: [[doth2:%.*]] = call half @llvm.dx.dot2.v2f16(<2 x half> %{{.*}}, <2 x half> %{{.*}}) ; DOPCHECK: [[doth2:%.*]] = call half @dx.op.dot2.f16(i32 54, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth2]]) @@ -37,7 +36,6 @@ entry: define noundef <3 x half> @test_normalize_half3(<3 x half> noundef %p0) { entry: - ; CHECK: extractelement <3 x half> %{{.*}}, i64 0 ; EXPCHECK: [[doth3:%.*]] = call half @llvm.dx.dot3.v3f16(<3 x half> %{{.*}}, <3 x half> %{{.*}}) ; DOPCHECK: [[doth3:%.*]] = call half @dx.op.dot3.f16(i32 55, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth3]]) @@ -52,7 +50,6 @@ entry: define noundef <4 x half> @test_normalize_half4(<4 x half> noundef %p0) { entry: - ; CHECK: extractelement <4 x half> %{{.*}}, i64 0 ; EXPCHECK: [[doth4:%.*]] = call half @llvm.dx.dot4.v4f16(<4 x half> %{{.*}}, <4 x half> %{{.*}}) ; DOPCHECK: [[doth4:%.*]] = call half @dx.op.dot4.f16(i32 56, half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}}) ; EXPCHECK: [[rsqrt:%.*]] = call half @llvm.dx.rsqrt.f16(half [[doth4]]) @@ -74,7 +71,6 @@ entry: define noundef <2 x float> @test_normalize_float2(<2 x float> noundef %p0) { entry: - ; CHECK: extractelement <2 x float> %{{.*}}, i64 0 ; EXPCHECK: [[dotf2:%.*]] = call float @llvm.dx.dot2.v2f32(<2 x float> %{{.*}}, <2 x float> %{{.*}}) ; DOPCHECK: [[dotf2:%.*]] = call float @dx.op.dot2.f32(i32 54, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf2]]) @@ -89,7 +85,6 @@ entry: define noundef <3 x float> @test_normalize_float3(<3 x float> noundef %p0) { entry: - ; CHECK: extractelement <3 x float> %{{.*}}, i64 0 ; EXPCHECK: [[dotf3:%.*]] = call float @llvm.dx.dot3.v3f32(<3 x float> %{{.*}}, <3 x float> %{{.*}}) ; DOPCHECK: [[dotf3:%.*]] = call float @dx.op.dot3.f32(i32 55, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf3]]) @@ -104,7 +99,6 @@ entry: define noundef <4 x float> @test_normalize_float4(<4 x float> noundef %p0) { entry: - ; CHECK: extractelement <4 x float> %{{.*}}, i64 0 ; EXPCHECK: [[dotf4:%.*]] = call float @llvm.dx.dot4.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}) ; DOPCHECK: [[dotf4:%.*]] = call float @dx.op.dot4.f32(i32 56, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) ; EXPCHECK: [[rsqrt:%.*]] = call float @llvm.dx.rsqrt.f32(float [[dotf4]]) From 4f7ce107de0c3ae0fb5748f98bc696b6eec7aad9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 14 Aug 2024 14:51:05 -0700 Subject: [PATCH 08/47] [RISCV] Don't combine (sext_inreg (fmv_x_anyexth X), i16) with Zhinx. With Zfh and Zfhmin this combine creates a fmv_x_signexth node so we can remember that the result is sign extended. This become a fmv.x.h instruction which sign extends its result. With Zhinx, fmv_x_signexth becomes a COPY_TO_REGCLASS. In order for this to guarantee the result is properly sign extended we need all producers of a GPRF16 register class to guarantee the rest of the GPR is sign extended. I don't think we've done that. bitcasts from i16 to f16 definitely don't do it. The safest thing to do is to not do this combine so the sign_extend_inreg will emit a shift pair. This is also consistent with the code generated for Zfinx on RV64, we don't assume the upper 32 bits are sign extended. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +++- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 1 - llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll | 2 ++ llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll | 2 ++ 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 2b14deb479bf6f..02f48d41b56b3c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13843,8 +13843,10 @@ performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X) + // Don't do this with Zhinx. We need to explicitly sign extend the GPR. if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH && - cast(N->getOperand(1))->getVT().bitsGE(MVT::i16)) + cast(N->getOperand(1))->getVT().bitsGE(MVT::i16) && + Subtarget.hasStdExtZfhmin()) return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT, Src.getOperand(0)); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 85715ca9145c35..abdd366741eb04 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -458,7 +458,6 @@ def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1, FRM_RNE)>; // Moves (no conversion) def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>; def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>; -def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>; def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2, FRM_RNE))>; } // Predicates = [HasStdExtZhinxmin] diff --git a/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll index 08dcefa0464030..9aec4dea63b9d2 100644 --- a/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64zfh-half-convert.ll @@ -123,6 +123,8 @@ define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind { ; RV64IZHINX-LABEL: bcvt_f16_to_sext_i16: ; RV64IZHINX: # %bb.0: ; RV64IZHINX-NEXT: fadd.h a0, a0, a1 +; RV64IZHINX-NEXT: slli a0, a0, 48 +; RV64IZHINX-NEXT: srai a0, a0, 48 ; RV64IZHINX-NEXT: ret %1 = fadd half %a, %b %2 = bitcast half %1 to i16 diff --git a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll index f867fe46f0ec33..aac1a65e6c4fec 100644 --- a/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64zfhmin-half-convert.ll @@ -144,6 +144,8 @@ define signext i16 @bcvt_f16_to_sext_i16(half %a, half %b) nounwind { ; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 ; RV64IZHINXMIN-NEXT: fadd.s a0, a0, a1 ; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: slli a0, a0, 48 +; RV64IZHINXMIN-NEXT: srai a0, a0, 48 ; RV64IZHINXMIN-NEXT: ret %1 = fadd half %a, %b %2 = bitcast half %1 to i16 From 539bf499615dbbfe98deaac1021f351eaad330ea Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Wed, 14 Aug 2024 22:29:07 +0000 Subject: [PATCH 09/47] [Sanitizers] Disable prctl test on Android. --- compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp index f33aa2004db79e..d0be7f4fa87899 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/prctl.cpp @@ -1,5 +1,7 @@ // RUN: %clangxx %s -o %t && %run %t %p +// UNSUPPORTED: android + #include #include #include From 2b959bd7f21bc7550a99fb160997002b7e4f1b62 Mon Sep 17 00:00:00 2001 From: cor3ntin Date: Thu, 15 Aug 2024 00:55:54 +0200 Subject: [PATCH 10/47] [Clang] Error on extraneous template headers by default. (#104046) As discussed here https://github.com/llvm/llvm-project/issues/99296#issuecomment-2240807413 Fixes #99296 Fixes #50294 --- clang/docs/ReleaseNotes.rst | 9 +++++++++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 3 ++- clang/test/Misc/warning-flags.c | 3 +-- clang/test/SemaTemplate/temp_explicit.cpp | 7 ++++++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1541b0cbf4875c..7c4451d93394c3 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -65,6 +65,15 @@ C++ Specific Potentially Breaking Changes `-Wno-enum-constexpr-conversion`, to allow for a transition period for users. Now, in Clang 20, **it is no longer possible to suppress the diagnostic**. +- Extraneous template headers are now ill-formed by default. + This error can be disable with ``-Wno-error=extraneous-template-head``. + + .. code-block:: c++ + + template <> // error: extraneous template head + template + void f(); + ABI Changes in This Version --------------------------- diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c710c3360be7da..da2f939067bfab 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5428,7 +5428,8 @@ def err_template_spec_extra_headers : Error< "extraneous template parameter list in template specialization or " "out-of-line template definition">; def ext_template_spec_extra_headers : ExtWarn< - "extraneous template parameter list in template specialization">; + "extraneous template parameter list in template specialization">, + InGroup>, DefaultError; def note_explicit_template_spec_does_not_need_header : Note< "'template<>' header not required for explicitly-specialized class %0 " "declared here">; diff --git a/clang/test/Misc/warning-flags.c b/clang/test/Misc/warning-flags.c index cdbe1e95cba965..35543e6a49ffda 100644 --- a/clang/test/Misc/warning-flags.c +++ b/clang/test/Misc/warning-flags.c @@ -18,14 +18,13 @@ This test serves two purposes: The list of warnings below should NEVER grow. It should gradually shrink to 0. -CHECK: Warnings without flags (65): +CHECK: Warnings without flags (64): CHECK-NEXT: ext_expected_semi_decl_list CHECK-NEXT: ext_missing_whitespace_after_macro_name CHECK-NEXT: ext_new_paren_array_nonconst CHECK-NEXT: ext_plain_complex CHECK-NEXT: ext_template_arg_extra_parens -CHECK-NEXT: ext_template_spec_extra_headers CHECK-NEXT: ext_typecheck_cond_incompatible_operands CHECK-NEXT: ext_typecheck_ordered_comparison_of_pointer_integer CHECK-NEXT: ext_using_undefined_std diff --git a/clang/test/SemaTemplate/temp_explicit.cpp b/clang/test/SemaTemplate/temp_explicit.cpp index 0bb0cfad61fdb0..4612e4a57e90e0 100644 --- a/clang/test/SemaTemplate/temp_explicit.cpp +++ b/clang/test/SemaTemplate/temp_explicit.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -Wc++11-compat %s // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -Wc++11-compat -std=c++98 %s // RUN: %clang_cc1 -fsyntax-only -verify -pedantic -std=c++11 %s +// RUN: %clang_cc1 -fsyntax-only -verify -pedantic -std=c++20 %s // // Tests explicit instantiation of templates. template class X0 { }; @@ -128,11 +129,15 @@ struct Foo // expected-note{{header not required for explicitly-specialized {}; }; -template <> // expected-warning{{extraneous template parameter list}} +template <> // expected-error{{extraneous template parameter list}} template <> struct Foo::Bar {}; +#if __cplusplus >= 202002L +template<> void f(auto); // expected-error{{extraneous template parameter list}} +#endif + namespace N1 { template struct X7 { }; // expected-note{{here}} From 9a666deecb9ff6ca3a6b12e6c2877e19b74b54da Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Wed, 14 Aug 2024 17:17:06 -0700 Subject: [PATCH 11/47] [Clang] Overflow Pattern Exclusions (#100272) Introduce "-fsanitize-overflow-pattern-exclusion=" which can be used to disable sanitizer instrumentation for common overflow-dependent code patterns. For a wide selection of projects, proper overflow sanitization could help catch bugs and solve security vulnerabilities. Unfortunately, in some cases the integer overflow sanitizers are too noisy for their users and are often left disabled. Providing users with a method to disable sanitizer instrumentation of common patterns could mean more projects actually utilize the sanitizers in the first place. One such project that has opted to not use integer overflow (or truncation) sanitizers is the Linux Kernel. There has been some discussion[1] recently concerning mitigation strategies for unexpected arithmetic overflow. This discussion is still ongoing and a succinct article[2] accurately sums up the discussion. In summary, many Kernel developers do not want to introduce more arithmetic wrappers when most developers understand the code patterns as they are. Patterns like: if (base + offset < base) { ... } or while (i--) { ... } or #define SOME -1UL are extremely common in a code base like the Linux Kernel. It is perhaps too much to ask of kernel developers to use arithmetic wrappers in these cases. For example: while (wrapping_post_dec(i)) { ... } which wraps some builtin would not fly. This would incur too many changes to existing code; the code churn would be too much, at least too much to justify turning on overflow sanitizers. Currently, this commit tackles three pervasive idioms: 1. "if (a + b < a)" or some logically-equivalent re-ordering like "if (a > b + a)" 2. "while (i--)" (for unsigned) a post-decrement always overflows here 3. "-1UL, -2UL, etc" negation of unsigned constants will always overflow The patterns that are excluded can be chosen from the following list: - add-overflow-test - post-decr-while - negated-unsigned-const These can be enabled with a comma-separated list: -fsanitize-overflow-pattern-exclusion=add-overflow-test,negated-unsigned-const "all" or "none" may also be used to specify that all patterns should be excluded or that none should be. [1] https://lore.kernel.org/all/202404291502.612E0A10@keescook/ [2] https://lwn.net/Articles/979747/ CCs: @efriedma-quic @kees @jyknight @fmayer @vitalybuka Signed-off-by: Justin Stitt Co-authored-by: Bill Wendling --- clang/docs/ReleaseNotes.rst | 30 ++++ clang/docs/UndefinedBehaviorSanitizer.rst | 42 +++++ clang/include/clang/AST/Expr.h | 9 ++ clang/include/clang/AST/Stmt.h | 5 + clang/include/clang/Basic/LangOptions.def | 2 + clang/include/clang/Basic/LangOptions.h | 28 ++++ clang/include/clang/Driver/Options.td | 5 + clang/include/clang/Driver/SanitizerArgs.h | 1 + clang/lib/AST/Expr.cpp | 54 +++++++ clang/lib/CodeGen/CGExprScalar.cpp | 41 ++++- clang/lib/Driver/SanitizerArgs.cpp | 37 +++++ clang/lib/Driver/ToolChains/Clang.cpp | 3 + clang/lib/Frontend/CompilerInvocation.cpp | 13 ++ clang/lib/Serialization/ASTReaderStmt.cpp | 1 + clang/lib/Serialization/ASTWriterStmt.cpp | 1 + .../CodeGen/overflow-idiom-exclusion-fp.c | 83 ++++++++++ clang/test/CodeGen/overflow-idiom-exclusion.c | 151 ++++++++++++++++++ 17 files changed, 504 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/overflow-idiom-exclusion-fp.c create mode 100644 clang/test/CodeGen/overflow-idiom-exclusion.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7c4451d93394c3..f5696d6ce15dc7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -392,6 +392,36 @@ Moved checkers Sanitizers ---------- +- Added the ``-fsanitize-overflow-pattern-exclusion=`` flag which can be used + to disable specific overflow-dependent code patterns. The supported patterns + are: ``add-overflow-test``, ``negated-unsigned-const``, and + ``post-decr-while``. The sanitizer instrumentation can be toggled off for all + available patterns by specifying ``all``. Conversely, you can disable all + exclusions with ``none``. + + .. code-block:: c++ + + /// specified with ``-fsanitize-overflow-pattern-exclusion=add-overflow-test`` + int common_overflow_check_pattern(unsigned base, unsigned offset) { + if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings, won't be instrumented + } + + /// specified with ``-fsanitize-overflow-pattern-exclusion=negated-unsigned-const`` + void negation_overflow() { + unsigned long foo = -1UL; // No longer causes a negation overflow warning + unsigned long bar = -2UL; // and so on... + } + + /// specified with ``-fsanitize-overflow-pattern-exclusion=post-decr-while`` + void while_post_decrement() { + unsigned char count = 16; + while (count--) { /* ... */} // No longer causes unsigned-integer-overflow sanitizer to trip + } + + Many existing projects have a large amount of these code patterns present. + This new flag should allow those projects to enable integer sanitizers with + less noise. + Python Binding Changes ---------------------- - Fixed an issue that led to crashes when calling ``Type.get_exception_specification_kind``. diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 531d56e313826c..9f3d980eefbea7 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -293,6 +293,48 @@ To silence reports from unsigned integer overflow, you can set ``-fsanitize-recover=unsigned-integer-overflow``, is particularly useful for providing fuzzing signal without blowing up logs. +Disabling instrumentation for common overflow patterns +------------------------------------------------------ + +There are certain overflow-dependent or overflow-prone code patterns which +produce a lot of noise for integer overflow/truncation sanitizers. Negated +unsigned constants, post-decrements in a while loop condition and simple +overflow checks are accepted and pervasive code patterns. However, the signal +received from sanitizers instrumenting these code patterns may be too noisy for +some projects. To disable instrumentation for these common patterns one should +use ``-fsanitize-overflow-pattern-exclusion=``. + +Currently, this option supports three overflow-dependent code idioms: + +``negated-unsigned-const`` + +.. code-block:: c++ + + /// -fsanitize-overflow-pattern-exclusion=negated-unsigned-const + unsigned long foo = -1UL; // No longer causes a negation overflow warning + unsigned long bar = -2UL; // and so on... + +``post-decr-while`` + +.. code-block:: c++ + + /// -fsanitize-overflow-pattern-exclusion=post-decr-while + unsigned char count = 16; + while (count--) { /* ... */ } // No longer causes unsigned-integer-overflow sanitizer to trip + +``add-overflow-test`` + +.. code-block:: c++ + + /// -fsanitize-overflow-pattern-exclusion=add-overflow-test + if (base + offset < base) { /* ... */ } // The pattern of `a + b < a`, and other re-orderings, + // won't be instrumented (same for signed types) + +You can enable all exclusions with +``-fsanitize-overflow-pattern-exclusion=all`` or disable all exclusions with +``-fsanitize-overflow-pattern-exclusion=none``. Specifying ``none`` has +precedence over other values. + Issue Suppression ================= diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 5b813bfc2faf90..f5863524723a2e 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -4043,6 +4043,15 @@ class BinaryOperator : public Expr { void setHasStoredFPFeatures(bool B) { BinaryOperatorBits.HasFPFeatures = B; } bool hasStoredFPFeatures() const { return BinaryOperatorBits.HasFPFeatures; } + /// Set and get the bit that informs arithmetic overflow sanitizers whether + /// or not they should exclude certain BinaryOperators from instrumentation + void setExcludedOverflowPattern(bool B) { + BinaryOperatorBits.ExcludedOverflowPattern = B; + } + bool hasExcludedOverflowPattern() const { + return BinaryOperatorBits.ExcludedOverflowPattern; + } + /// Get FPFeatures from trailing storage FPOptionsOverride getStoredFPFeatures() const { assert(hasStoredFPFeatures()); diff --git a/clang/include/clang/AST/Stmt.h b/clang/include/clang/AST/Stmt.h index bbd7634bcc3bfb..f1a2aac0a8b2f8 100644 --- a/clang/include/clang/AST/Stmt.h +++ b/clang/include/clang/AST/Stmt.h @@ -650,6 +650,11 @@ class alignas(void *) Stmt { LLVM_PREFERRED_TYPE(bool) unsigned HasFPFeatures : 1; + /// Whether or not this BinaryOperator should be excluded from integer + /// overflow sanitization. + LLVM_PREFERRED_TYPE(bool) + unsigned ExcludedOverflowPattern : 1; + SourceLocation OpLoc; }; diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index d454a7ff2f8cf4..2e9f2c552aad8a 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -406,6 +406,8 @@ VALUE_LANGOPT(TrivialAutoVarInitMaxSize, 32, 0, "stop trivial automatic variable initialization if var size exceeds the specified size (in bytes). Must be greater than 0.") ENUM_LANGOPT(SignedOverflowBehavior, SignedOverflowBehaviorTy, 2, SOB_Undefined, "signed integer overflow handling") +LANGOPT(IgnoreNegationOverflow, 1, 0, "ignore overflow caused by negation") +LANGOPT(SanitizeOverflowIdioms, 1, 1, "enable instrumentation for common overflow idioms") ENUM_LANGOPT(ThreadModel , ThreadModelKind, 2, ThreadModelKind::POSIX, "Thread Model") BENIGN_LANGOPT(ArrowDepth, 32, 256, diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 91f1c2f2e6239e..eb4cb4b5a7e93f 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -367,6 +367,21 @@ class LangOptionsBase { PerThread, }; + /// Exclude certain code patterns from being instrumented by arithmetic + /// overflow sanitizers + enum OverflowPatternExclusionKind { + /// Don't exclude any overflow patterns from sanitizers + None = 1 << 0, + /// Exclude all overflow patterns (below) + All = 1 << 1, + /// if (a + b < a) + AddOverflowTest = 1 << 2, + /// -1UL + NegUnsignedConst = 1 << 3, + /// while (count--) + PostDecrInWhile = 1 << 4, + }; + enum class DefaultVisiblityExportMapping { None, /// map only explicit default visibilities to exported @@ -555,6 +570,11 @@ class LangOptions : public LangOptionsBase { /// The default stream kind used for HIP kernel launching. GPUDefaultStreamKind GPUDefaultStream; + /// Which overflow patterns should be excluded from sanitizer instrumentation + unsigned OverflowPatternExclusionMask = 0; + + std::vector OverflowPatternExclusionValues; + /// The seed used by the randomize structure layout feature. std::string RandstructSeed; @@ -630,6 +650,14 @@ class LangOptions : public LangOptionsBase { return MSCompatibilityVersion >= MajorVersion * 100000U; } + bool isOverflowPatternExcluded(OverflowPatternExclusionKind Kind) const { + if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::None) + return false; + if (OverflowPatternExclusionMask & OverflowPatternExclusionKind::All) + return true; + return OverflowPatternExclusionMask & Kind; + } + /// Reset all of the options that are not considered when building a /// module. void resetNonModularOptions(); diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 6df3a6a5943a97..acc1f2fde53979 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2565,6 +2565,11 @@ defm sanitize_stats : BoolOption<"f", "sanitize-stats", "Disable">, BothFlags<[], [ClangOption], " sanitizer statistics gathering.">>, Group; +def fsanitize_overflow_pattern_exclusion_EQ : CommaJoined<["-"], "fsanitize-overflow-pattern-exclusion=">, + HelpText<"Specify the overflow patterns to exclude from artihmetic sanitizer instrumentation">, + Visibility<[ClangOption, CC1Option]>, + Values<"none,all,add-overflow-test,negated-unsigned-const,post-decr-while">, + MarshallingInfoStringVector>; def fsanitize_thread_memory_access : Flag<["-"], "fsanitize-thread-memory-access">, Group, HelpText<"Enable memory access instrumentation in ThreadSanitizer (default)">; diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h index 47ef175302679f..e64ec463ca8907 100644 --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -33,6 +33,7 @@ class SanitizerArgs { std::vector BinaryMetadataIgnorelistFiles; int CoverageFeatures = 0; int BinaryMetadataFeatures = 0; + int OverflowPatternExclusions = 0; int MsanTrackOrigins = 0; bool MsanUseAfterDtor = true; bool MsanParamRetval = true; diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 9d5b8167d0ee62..57475c66a94e35 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -4759,6 +4759,53 @@ ParenListExpr *ParenListExpr::CreateEmpty(const ASTContext &Ctx, return new (Mem) ParenListExpr(EmptyShell(), NumExprs); } +/// Certain overflow-dependent code patterns can have their integer overflow +/// sanitization disabled. Check for the common pattern `if (a + b < a)` and +/// return the resulting BinaryOperator responsible for the addition so we can +/// elide overflow checks during codegen. +static std::optional +getOverflowPatternBinOp(const BinaryOperator *E) { + Expr *Addition, *ComparedTo; + if (E->getOpcode() == BO_LT) { + Addition = E->getLHS(); + ComparedTo = E->getRHS(); + } else if (E->getOpcode() == BO_GT) { + Addition = E->getRHS(); + ComparedTo = E->getLHS(); + } else { + return {}; + } + + const Expr *AddLHS = nullptr, *AddRHS = nullptr; + BinaryOperator *BO = dyn_cast(Addition); + + if (BO && BO->getOpcode() == clang::BO_Add) { + // now store addends for lookup on other side of '>' + AddLHS = BO->getLHS(); + AddRHS = BO->getRHS(); + } + + if (!AddLHS || !AddRHS) + return {}; + + const Decl *LHSDecl, *RHSDecl, *OtherDecl; + + LHSDecl = AddLHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee(); + RHSDecl = AddRHS->IgnoreParenImpCasts()->getReferencedDeclOfCallee(); + OtherDecl = ComparedTo->IgnoreParenImpCasts()->getReferencedDeclOfCallee(); + + if (!OtherDecl) + return {}; + + if (!LHSDecl && !RHSDecl) + return {}; + + if ((LHSDecl && LHSDecl == OtherDecl && LHSDecl != RHSDecl) || + (RHSDecl && RHSDecl == OtherDecl && RHSDecl != LHSDecl)) + return BO; + return {}; +} + BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy, ExprValueKind VK, ExprObjectKind OK, SourceLocation opLoc, @@ -4768,8 +4815,15 @@ BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs, assert(!isCompoundAssignmentOp() && "Use CompoundAssignOperator for compound assignments"); BinaryOperatorBits.OpLoc = opLoc; + BinaryOperatorBits.ExcludedOverflowPattern = 0; SubExprs[LHS] = lhs; SubExprs[RHS] = rhs; + if (Ctx.getLangOpts().isOverflowPatternExcluded( + LangOptions::OverflowPatternExclusionKind::AddOverflowTest)) { + std::optional Result = getOverflowPatternBinOp(this); + if (Result.has_value()) + Result.value()->BinaryOperatorBits.ExcludedOverflowPattern = 1; + } BinaryOperatorBits.HasFPFeatures = FPFeatures.requiresTrailingStorage(); if (hasStoredFPFeatures()) setStoredFPFeatures(FPFeatures); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 84392745ea6144..6eac2b4c54e1ba 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -24,6 +24,7 @@ #include "clang/AST/Attr.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/Expr.h" +#include "clang/AST/ParentMapContext.h" #include "clang/AST/RecordLayout.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/CodeGenOptions.h" @@ -195,13 +196,24 @@ static bool CanElideOverflowCheck(const ASTContext &Ctx, const BinOpInfo &Op) { if (!Op.mayHaveIntegerOverflow()) return true; + const UnaryOperator *UO = dyn_cast(Op.E); + + if (UO && UO->getOpcode() == UO_Minus && + Ctx.getLangOpts().isOverflowPatternExcluded( + LangOptions::OverflowPatternExclusionKind::NegUnsignedConst) && + UO->isIntegerConstantExpr(Ctx)) + return true; + // If a unary op has a widened operand, the op cannot overflow. - if (const auto *UO = dyn_cast(Op.E)) + if (UO) return !UO->canOverflow(); // We usually don't need overflow checks for binops with widened operands. // Multiplication with promoted unsigned operands is a special case. const auto *BO = cast(Op.E); + if (BO->hasExcludedOverflowPattern()) + return true; + auto OptionalLHSTy = getUnwidenedIntegerType(Ctx, BO->getLHS()); if (!OptionalLHSTy) return false; @@ -2766,6 +2778,26 @@ llvm::Value *ScalarExprEmitter::EmitIncDecConsiderOverflowBehavior( llvm_unreachable("Unknown SignedOverflowBehaviorTy"); } +/// For the purposes of overflow pattern exclusion, does this match the +/// "while(i--)" pattern? +static bool matchesPostDecrInWhile(const UnaryOperator *UO, bool isInc, + bool isPre, ASTContext &Ctx) { + if (isInc || isPre) + return false; + + // -fsanitize-overflow-pattern-exclusion=post-decr-while + if (!Ctx.getLangOpts().isOverflowPatternExcluded( + LangOptions::OverflowPatternExclusionKind::PostDecrInWhile)) + return false; + + // all Parents (usually just one) must be a WhileStmt + for (const auto &Parent : Ctx.getParentMapContext().getParents(*UO)) + if (!Parent.get()) + return false; + + return true; +} + namespace { /// Handles check and update for lastprivate conditional variables. class OMPLastprivateConditionalUpdateRAII { @@ -2877,6 +2909,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } else if (type->isIntegerType()) { QualType promotedType; bool canPerformLossyDemotionCheck = false; + + bool excludeOverflowPattern = + matchesPostDecrInWhile(E, isInc, isPre, CGF.getContext()); + if (CGF.getContext().isPromotableIntegerType(type)) { promotedType = CGF.getContext().getPromotedIntegerType(type); assert(promotedType != type && "Shouldn't promote to the same type."); @@ -2936,7 +2972,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } else if (E->canOverflow() && type->isSignedIntegerOrEnumerationType()) { value = EmitIncDecConsiderOverflowBehavior(E, value, isInc); } else if (E->canOverflow() && type->isUnsignedIntegerType() && - CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow)) { + CGF.SanOpts.has(SanitizerKind::UnsignedIntegerOverflow) && + !excludeOverflowPattern) { value = EmitOverflowCheckedBinOp(createBinOpInfoFromIncDec( E, value, isInc, E->getFPFeaturesInEffect(CGF.getLangOpts()))); } else { diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 1fd870b72286e5..a63ee944fd1bb4 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -119,6 +119,10 @@ static SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A, static int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A, bool DiagnoseErrors); +static int parseOverflowPatternExclusionValues(const Driver &D, + const llvm::opt::Arg *A, + bool DiagnoseErrors); + /// Parse -f(no-)?sanitize-metadata= flag values, diagnosing any invalid /// components. Returns OR of members of \c BinaryMetadataFeature enumeration. static int parseBinaryMetadataFeatures(const Driver &D, const llvm::opt::Arg *A, @@ -788,6 +792,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, << "fsanitize-trap=cfi"; } + for (const auto *Arg : + Args.filtered(options::OPT_fsanitize_overflow_pattern_exclusion_EQ)) { + Arg->claim(); + OverflowPatternExclusions |= + parseOverflowPatternExclusionValues(D, Arg, DiagnoseErrors); + } + // Parse -f(no-)?sanitize-coverage flags if coverage is supported by the // enabled sanitizers. for (const auto *Arg : Args) { @@ -1241,6 +1252,10 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, addSpecialCaseListOpt(Args, CmdArgs, "-fsanitize-system-ignorelist=", SystemIgnorelistFiles); + if (OverflowPatternExclusions) + Args.AddAllArgs(CmdArgs, + options::OPT_fsanitize_overflow_pattern_exclusion_EQ); + if (MsanTrackOrigins) CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" + Twine(MsanTrackOrigins))); @@ -1426,6 +1441,28 @@ SanitizerMask parseArgValues(const Driver &D, const llvm::opt::Arg *A, return Kinds; } +static int parseOverflowPatternExclusionValues(const Driver &D, + const llvm::opt::Arg *A, + bool DiagnoseErrors) { + int Exclusions = 0; + for (int i = 0, n = A->getNumValues(); i != n; ++i) { + const char *Value = A->getValue(i); + int E = + llvm::StringSwitch(Value) + .Case("none", LangOptionsBase::None) + .Case("all", LangOptionsBase::All) + .Case("add-overflow-test", LangOptionsBase::AddOverflowTest) + .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst) + .Case("post-decr-while", LangOptionsBase::PostDecrInWhile) + .Default(0); + if (E == 0) + D.Diag(clang::diag::err_drv_unsupported_option_argument) + << A->getSpelling() << Value; + Exclusions |= E; + } + return Exclusions; +} + int parseCoverageFeatures(const Driver &D, const llvm::opt::Arg *A, bool DiagnoseErrors) { assert(A->getOption().matches(options::OPT_fsanitize_coverage) || diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 96aa930ea28612..f2bc11839edd4d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7769,6 +7769,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fgpu_default_stream_EQ); } + Args.AddAllArgs(CmdArgs, + options::OPT_fsanitize_overflow_pattern_exclusion_EQ); + Args.AddLastArg(CmdArgs, options::OPT_foffload_uniform_block, options::OPT_fno_offload_uniform_block); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index e3911c281985b7..5a5f5cb79a12f2 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4267,6 +4267,19 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } + if (auto *A = Args.getLastArg(OPT_fsanitize_overflow_pattern_exclusion_EQ)) { + for (int i = 0, n = A->getNumValues(); i != n; ++i) { + Opts.OverflowPatternExclusionMask |= + llvm::StringSwitch(A->getValue(i)) + .Case("none", LangOptionsBase::None) + .Case("all", LangOptionsBase::All) + .Case("add-overflow-test", LangOptionsBase::AddOverflowTest) + .Case("negated-unsigned-const", LangOptionsBase::NegUnsignedConst) + .Case("post-decr-while", LangOptionsBase::PostDecrInWhile) + .Default(0); + } + } + // Parse -fsanitize= arguments. parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ), Diags, Opts.Sanitize); diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index a33f2a41a65497..8ae07907a04aba 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -1128,6 +1128,7 @@ void ASTStmtReader::VisitBinaryOperator(BinaryOperator *E) { (BinaryOperator::Opcode)CurrentUnpackingBits->getNextBits(/*Width=*/6)); bool hasFP_Features = CurrentUnpackingBits->getNextBit(); E->setHasStoredFPFeatures(hasFP_Features); + E->setExcludedOverflowPattern(CurrentUnpackingBits->getNextBit()); E->setLHS(Record.readSubExpr()); E->setRHS(Record.readSubExpr()); E->setOperatorLoc(readSourceLocation()); diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 038616a675b727..c292d0a789c7cd 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -1063,6 +1063,7 @@ void ASTStmtWriter::VisitBinaryOperator(BinaryOperator *E) { CurrentPackingBits.addBits(E->getOpcode(), /*Width=*/6); bool HasFPFeatures = E->hasStoredFPFeatures(); CurrentPackingBits.addBit(HasFPFeatures); + CurrentPackingBits.addBit(E->hasExcludedOverflowPattern()); Record.AddStmt(E->getLHS()); Record.AddStmt(E->getRHS()); Record.AddSourceLocation(E->getOperatorLoc()); diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c new file mode 100644 index 00000000000000..d21405c56beab3 --- /dev/null +++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c @@ -0,0 +1,83 @@ +// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s +// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s + +// Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns +extern unsigned a, b, c; +extern int u, v, w; + +extern unsigned some(void); + +// Make sure all these still have handler paths, we shouldn't be excluding +// instrumentation of any "near" patterns. +// CHECK-LABEL: close_but_not_quite +void close_but_not_quite(void) { + // CHECK: br i1{{.*}}handler. + if (a + b > a) + c = 9; + + // CHECK: br i1{{.*}}handler. + if (a - b < a) + c = 9; + + // CHECK: br i1{{.*}}handler. + if (a + b < a) + c = 9; + + // CHECK: br i1{{.*}}handler. + if (a + b + 1 < a) + c = 9; + + // CHECK: br i1{{.*}}handler. + // CHECK: br i1{{.*}}handler. + if (a + b < a + 1) + c = 9; + + // CHECK: br i1{{.*}}handler. + if (b >= a + b) + c = 9; + + // CHECK: br i1{{.*}}handler. + if (a + a < a) + c = 9; + + // CHECK: br i1{{.*}}handler. + if (a + b == a) + c = 9; + + // CHECK: br i1{{.*}}handler + // Although this can never actually overflow we are still checking that the + // sanitizer instruments it. + while (--a) + some(); +} + +// cvise'd kernel code that caused problems during development +typedef unsigned _size_t; +typedef enum { FSE_repeat_none } FSE_repeat; +typedef enum { ZSTD_defaultAllowed } ZSTD_defaultPolicy_e; +FSE_repeat ZSTD_selectEncodingType_repeatMode; +ZSTD_defaultPolicy_e ZSTD_selectEncodingType_isDefaultAllowed; +_size_t ZSTD_NCountCost(void); + +// CHECK-LABEL: ZSTD_selectEncodingType +// CHECK: br i1{{.*}}handler +void ZSTD_selectEncodingType(void) { + _size_t basicCost = + ZSTD_selectEncodingType_isDefaultAllowed ? ZSTD_NCountCost() : 0, + compressedCost = 3 + ZSTD_NCountCost(); + if (basicCost <= compressedCost) + ZSTD_selectEncodingType_repeatMode = FSE_repeat_none; +} + +// CHECK-LABEL: function_calls +void function_calls(void) { + // CHECK: br i1{{.*}}handler + if (some() + b < some()) + c = 9; +} + +// CHECK-LABEL: not_quite_a_negated_unsigned_const +void not_quite_a_negated_unsigned_const(void) { + // CHECK: br i1{{.*}}handler + a = -b; +} diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c new file mode 100644 index 00000000000000..7c8c4af61029de --- /dev/null +++ b/clang/test/CodeGen/overflow-idiom-exclusion.c @@ -0,0 +1,151 @@ +// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s +// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s +// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - | FileCheck %s --check-prefix=ADD +// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - | FileCheck %s --check-prefix=NEGATE +// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - | FileCheck %s --check-prefix=WHILE + +// Ensure some common overflow-dependent or overflow-prone code patterns don't +// trigger the overflow sanitizers. In many cases, overflow warnings caused by +// these patterns are seen as "noise" and result in users turning off +// sanitization all together. + +// A pattern like "if (a + b < a)" simply checks for overflow and usually means +// the user is trying to handle it gracefully. + +// Similarly, a pattern resembling "while (i--)" is extremely common and +// warning on its inevitable overflow can be seen as superfluous. Do note that +// using "i" in future calculations can be tricky because it will still +// wrap-around. + +// Another common pattern that, in some cases, is found to be too noisy is +// unsigned negation, for example: +// unsigned long A = -1UL; + + +// CHECK-NOT: handle{{.*}}overflow + +// ADD: usub.with.overflow +// ADD: negate_overflow +// ADD-NOT: handler.add_overflow + +// NEGATE: handler.add_overflow +// NEGATE: usub.with.overflow +// NEGATE-NOT: negate_overflow + +// WHILE: handler.add_overflow +// WHILE: negate_overflow +// WHILE-NOT: usub.with.overflow +extern unsigned a, b, c; +extern unsigned some(void); + +void basic_commutativity(void) { + if (a + b < a) + c = 9; + if (a + b < b) + c = 9; + if (b + a < b) + c = 9; + if (b + a < a) + c = 9; + if (a > a + b) + c = 9; + if (a > b + a) + c = 9; + if (b > a + b) + c = 9; + if (b > b + a) + c = 9; +} + +void arguments_and_commutativity(unsigned V1, unsigned V2) { + if (V1 + V2 < V1) + c = 9; + if (V1 + V2 < V2) + c = 9; + if (V2 + V1 < V2) + c = 9; + if (V2 + V1 < V1) + c = 9; + if (V1 > V1 + V2) + c = 9; + if (V1 > V2 + V1) + c = 9; + if (V2 > V1 + V2) + c = 9; + if (V2 > V2 + V1) + c = 9; +} + +void pointers(unsigned *P1, unsigned *P2, unsigned V1) { + if (*P1 + *P2 < *P1) + c = 9; + if (*P1 + V1 < V1) + c = 9; + if (V1 + *P2 < *P2) + c = 9; +} + +struct OtherStruct { + unsigned foo, bar; +}; + +struct MyStruct { + unsigned base, offset; + struct OtherStruct os; +}; + +extern struct MyStruct ms; + +void structs(void) { + if (ms.base + ms.offset < ms.base) + c = 9; +} + +void nestedstructs(void) { + if (ms.os.foo + ms.os.bar < ms.os.foo) + c = 9; +} + +// Normally, this would be folded into a simple call to the overflow handler +// and a store. Excluding this pattern results in just a store. +void constants(void) { + unsigned base = 4294967295; + unsigned offset = 1; + if (base + offset < base) + c = 9; +} + +void common_while(unsigned i) { + // This post-decrement usually causes overflow sanitizers to trip on the very + // last operation. + while (i--) { + some(); + } +} + +// Normally, these assignments would trip the unsigned overflow sanitizer. +void negation(void) { +#define SOME -1UL + unsigned long A = -1UL; + unsigned long B = -2UL; + unsigned long C = -3UL; + unsigned long D = -SOME; + (void)A;(void)B;(void)C;(void)D; +} + +// cvise'd kernel code that caused problems during development due to sign +// extension +typedef unsigned long _size_t; +int qnbytes; +int *key_alloc_key; +_size_t key_alloc_quotalen; +int *key_alloc(void) { + if (qnbytes + key_alloc_quotalen < qnbytes) + return key_alloc_key; + return key_alloc_key + 3;; +} + +void function_call(void) { + if (b + some() < b) + c = 9; +} From 9a9ca9850f3c6b278e052745f51a87296d9fedd2 Mon Sep 17 00:00:00 2001 From: Krzysztof Drewniak Date: Wed, 14 Aug 2024 17:20:25 -0700 Subject: [PATCH 12/47] [mlir][MemRef] Add more ops to narrow type support, strided metadata expansion (#102228) - Add support fef memory_space_cast to strided metadata expansion and narrow type emulation - Add support for expand_shape to narrow type emulation (like collapse_shape, it's a noop after linearization) and to expand-strided-metadata (mirroring the collapse_shape pattern) - Add support for memref.dealloc to narrow type emulation (it is a trivial rewrite) and for memref.copy (which is unsupported when it is used for a layout change but a trivial rewrite otherwise) --- .../MemRef/Transforms/EmulateNarrowType.cpp | 93 ++++++++++++++++++- .../Transforms/ExpandStridedMetadata.cpp | 87 +++++++++++++++++ .../Dialect/MemRef/emulate-narrow-type.mlir | 68 ++++++++++++++ .../MemRef/expand-strided-metadata.mlir | 38 ++++++++ 4 files changed, 283 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp index 88d56a8fbec749..a45b79194a7580 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp @@ -234,6 +234,46 @@ struct ConvertMemRefAssumeAlignment final } }; +//===----------------------------------------------------------------------===// +// ConvertMemRefCopy +//===----------------------------------------------------------------------===// + +struct ConvertMemRefCopy final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(memref::CopyOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + auto maybeRankedSource = dyn_cast(op.getSource().getType()); + auto maybeRankedDest = dyn_cast(op.getTarget().getType()); + if (maybeRankedSource && maybeRankedDest && + maybeRankedSource.getLayout() != maybeRankedDest.getLayout()) + return rewriter.notifyMatchFailure( + op, llvm::formatv("memref.copy emulation with distinct layouts ({0} " + "and {1}) is currently unimplemented", + maybeRankedSource.getLayout(), + maybeRankedDest.getLayout())); + rewriter.replaceOpWithNewOp(op, adaptor.getSource(), + adaptor.getTarget()); + return success(); + } +}; + +//===----------------------------------------------------------------------===// +// ConvertMemRefDealloc +//===----------------------------------------------------------------------===// + +struct ConvertMemRefDealloc final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(memref::DeallocOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp(op, adaptor.getMemref()); + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertMemRefLoad //===----------------------------------------------------------------------===// @@ -300,6 +340,30 @@ struct ConvertMemRefLoad final : OpConversionPattern { } }; +//===----------------------------------------------------------------------===// +// ConvertMemRefMemorySpaceCast +//===----------------------------------------------------------------------===// + +struct ConvertMemRefMemorySpaceCast final + : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(memref::MemorySpaceCastOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Type newTy = getTypeConverter()->convertType(op.getDest().getType()); + if (!newTy) { + return rewriter.notifyMatchFailure( + op->getLoc(), llvm::formatv("failed to convert memref type: {0}", + op.getDest().getType())); + } + + rewriter.replaceOpWithNewOp(op, newTy, + adaptor.getSource()); + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertMemRefReinterpretCast //===----------------------------------------------------------------------===// @@ -490,6 +554,28 @@ struct ConvertMemRefCollapseShape final } }; +/// Emulating a `memref.expand_shape` becomes a no-op after emulation given +/// that we flatten memrefs to a single dimension as part of the emulation and +/// the expansion would just have been undone. +struct ConvertMemRefExpandShape final + : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(memref::ExpandShapeOp expandShapeOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Value srcVal = adaptor.getSrc(); + auto newTy = dyn_cast(srcVal.getType()); + if (!newTy) + return failure(); + + if (newTy.getRank() != 1) + return failure(); + + rewriter.replaceOp(expandShapeOp, srcVal); + return success(); + } +}; } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -502,9 +588,10 @@ void memref::populateMemRefNarrowTypeEmulationPatterns( // Populate `memref.*` conversion patterns. patterns.add, - ConvertMemRefAllocation, - ConvertMemRefCollapseShape, ConvertMemRefLoad, - ConvertMemrefStore, ConvertMemRefAssumeAlignment, + ConvertMemRefAllocation, ConvertMemRefCopy, + ConvertMemRefDealloc, ConvertMemRefCollapseShape, + ConvertMemRefExpandShape, ConvertMemRefLoad, ConvertMemrefStore, + ConvertMemRefAssumeAlignment, ConvertMemRefMemorySpaceCast, ConvertMemRefSubview, ConvertMemRefReinterpretCast>( typeConverter, patterns.getContext()); memref::populateResolveExtractStridedMetadataPatterns(patterns); diff --git a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp index 585c5b73814219..a2049ba4a4924d 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandStridedMetadata.cpp @@ -726,6 +726,41 @@ struct ExtractStridedMetadataOpCollapseShapeFolder } }; +/// Pattern to replace `extract_strided_metadata(expand_shape)` +/// with the results of computing the sizes and strides on the expanded shape +/// and dividing up dimensions into static and dynamic parts as needed. +struct ExtractStridedMetadataOpExpandShapeFolder + : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(memref::ExtractStridedMetadataOp op, + PatternRewriter &rewriter) const override { + auto expandShapeOp = op.getSource().getDefiningOp(); + if (!expandShapeOp) + return failure(); + + FailureOr stridedMetadata = + resolveReshapeStridedMetadata( + rewriter, expandShapeOp, getExpandedSizes, getExpandedStrides); + if (failed(stridedMetadata)) { + return rewriter.notifyMatchFailure( + op, "failed to resolve metadata in terms of source expand_shape op"); + } + + Location loc = expandShapeOp.getLoc(); + SmallVector results; + results.push_back(stridedMetadata->basePtr); + results.push_back(getValueOrCreateConstantIndexOp(rewriter, loc, + stridedMetadata->offset)); + results.append( + getValueOrCreateConstantIndexOp(rewriter, loc, stridedMetadata->sizes)); + results.append(getValueOrCreateConstantIndexOp(rewriter, loc, + stridedMetadata->strides)); + rewriter.replaceOp(op, results); + return success(); + } +}; + /// Replace `base, offset, sizes, strides = /// extract_strided_metadata(allocLikeOp)` /// @@ -1060,6 +1095,54 @@ class ExtractStridedMetadataOpCastFolder } }; +/// Replace `base, offset, sizes, strides = extract_strided_metadata( +/// memory_space_cast(src) to dstTy)` +/// with +/// ``` +/// oldBase, offset, sizes, strides = extract_strided_metadata(src) +/// destBaseTy = type(oldBase) with memory space from destTy +/// base = memory_space_cast(oldBase) to destBaseTy +/// ``` +/// +/// In other words, propagate metadata extraction accross memory space casts. +class ExtractStridedMetadataOpMemorySpaceCastFolder + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult + matchAndRewrite(memref::ExtractStridedMetadataOp extractStridedMetadataOp, + PatternRewriter &rewriter) const override { + Location loc = extractStridedMetadataOp.getLoc(); + Value source = extractStridedMetadataOp.getSource(); + auto memSpaceCastOp = source.getDefiningOp(); + if (!memSpaceCastOp) + return failure(); + auto newExtractStridedMetadata = + rewriter.create( + loc, memSpaceCastOp.getSource()); + SmallVector results(newExtractStridedMetadata.getResults()); + // As with most other strided metadata rewrite patterns, don't introduce + // a use of the base pointer where non existed. This needs to happen here, + // as opposed to in later dead-code elimination, because these patterns are + // sometimes used during dialect conversion (see EmulateNarrowType, for + // example), so adding spurious usages would cause a pre-legalization value + // to be live that would be dead had this pattern not run. + if (!extractStridedMetadataOp.getBaseBuffer().use_empty()) { + auto baseBuffer = results[0]; + auto baseBufferType = cast(baseBuffer.getType()); + MemRefType::Builder newTypeBuilder(baseBufferType); + newTypeBuilder.setMemorySpace( + memSpaceCastOp.getResult().getType().getMemorySpace()); + results[0] = rewriter.create( + loc, Type{newTypeBuilder}, baseBuffer); + } else { + results[0] = nullptr; + } + rewriter.replaceOp(extractStridedMetadataOp, results); + return success(); + } +}; + /// Replace `base, offset = /// extract_strided_metadata(extract_strided_metadata(src)#0)` /// With @@ -1099,11 +1182,13 @@ void memref::populateExpandStridedMetadataPatterns( ExtractStridedMetadataOpAllocFolder, ExtractStridedMetadataOpAllocFolder, ExtractStridedMetadataOpCollapseShapeFolder, + ExtractStridedMetadataOpExpandShapeFolder, ExtractStridedMetadataOpGetGlobalFolder, RewriteExtractAlignedPointerAsIndexOfViewLikeOp, ExtractStridedMetadataOpReinterpretCastFolder, ExtractStridedMetadataOpSubviewFolder, ExtractStridedMetadataOpCastFolder, + ExtractStridedMetadataOpMemorySpaceCastFolder, ExtractStridedMetadataOpExtractStridedMetadataFolder>( patterns.getContext()); } @@ -1113,11 +1198,13 @@ void memref::populateResolveExtractStridedMetadataPatterns( patterns.add, ExtractStridedMetadataOpAllocFolder, ExtractStridedMetadataOpCollapseShapeFolder, + ExtractStridedMetadataOpExpandShapeFolder, ExtractStridedMetadataOpGetGlobalFolder, ExtractStridedMetadataOpSubviewFolder, RewriteExtractAlignedPointerAsIndexOfViewLikeOp, ExtractStridedMetadataOpReinterpretCastFolder, ExtractStridedMetadataOpCastFolder, + ExtractStridedMetadataOpMemorySpaceCastFolder, ExtractStridedMetadataOpExtractStridedMetadataFolder>( patterns.getContext()); } diff --git a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir index a67237b5e4dd19..540da239fced08 100644 --- a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir +++ b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir @@ -6,11 +6,13 @@ func.func @memref_i8() -> i8 { %c3 = arith.constant 3 : index %m = memref.alloc() : memref<4xi8, 1> %v = memref.load %m[%c3] : memref<4xi8, 1> + memref.dealloc %m : memref<4xi8, 1> return %v : i8 } // CHECK-LABEL: func @memref_i8() // CHECK: %[[M:.+]] = memref.alloc() : memref<4xi8, 1> // CHECK-NEXT: %[[V:.+]] = memref.load %[[M]][%{{.+}}] : memref<4xi8, 1> +// CHECK-NEXT: memref.dealloc %[[M]] // CHECK-NEXT: return %[[V]] // CHECK32-LABEL: func @memref_i8() @@ -21,6 +23,7 @@ func.func @memref_i8() -> i8 { // CHECK32: %[[CAST:.+]] = arith.index_cast %[[C24]] : index to i32 // CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[V]], %[[CAST]] // CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i8 +// CHECK32-NEXT: memref.dealloc %[[M]] // CHECK32-NEXT: return %[[TRUNC]] // ----- @@ -485,3 +488,68 @@ func.func @memref_collapse_shape_i4(%idx0 : index, %idx1 : index) -> i4 { // CHECK32-NOT: memref.collapse_shape // CHECK32: memref.load %[[ALLOC]][%{{.*}}] : memref<4096xi32> +// ----- + +func.func @memref_expand_shape_i4(%idx0 : index, %idx1 : index, %idx2 : index) -> i4 { + %arr = memref.alloc() : memref<256x128xi4> + %expand = memref.expand_shape %arr[[0, 1], [2]] output_shape [32, 8, 128] : memref<256x128xi4> into memref<32x8x128xi4> + %1 = memref.load %expand[%idx0, %idx1, %idx2] : memref<32x8x128xi4> + return %1 : i4 +} + +// CHECK-LABEL: func.func @memref_expand_shape_i4( +// CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<16384xi8> +// CHECK-NOT: memref.expand_shape +// CHECK: memref.load %[[ALLOC]][%{{.*}}] : memref<16384xi8> + +// CHECK32-LABEL: func.func @memref_expand_shape_i4( +// CHECK32: %[[ALLOC:.*]] = memref.alloc() : memref<4096xi32> +// CHECK32-NOT: memref.expand_shape +// CHECK32: memref.load %[[ALLOC]][%{{.*}}] : memref<4096xi32> + +// ----- + +func.func @memref_memory_space_cast_i4(%arg0: memref<32x128xi4, 1>) -> memref<32x128xi4> { + %cast = memref.memory_space_cast %arg0 : memref<32x128xi4, 1> to memref<32x128xi4> + return %cast : memref<32x128xi4> +} + +// CHECK-LABEL: func.func @memref_memory_space_cast_i4( +// CHECK-SAME: %[[ARG0:.*]]: memref<2048xi8, 1> +// CHECK: %[[CAST:.*]] = memref.memory_space_cast %[[ARG0]] : memref<2048xi8, 1> to memref<2048xi8> +// CHECK: return %[[CAST]] + +// CHECK32-LABEL: func.func @memref_memory_space_cast_i4( +// CHECK32-SAME: %[[ARG0:.*]]: memref<512xi32, 1> +// CHECK32: %[[CAST:.*]] = memref.memory_space_cast %[[ARG0]] : memref<512xi32, 1> to memref<512xi32> +// CHECK32: return %[[CAST]] + +// ----- + +func.func @memref_copy_i4(%arg0: memref<32x128xi4, 1>, %arg1: memref<32x128xi4>) { + memref.copy %arg0, %arg1 : memref<32x128xi4, 1> to memref<32x128xi4> + return +} + +// CHECK-LABEL: func.func @memref_copy_i4( +// CHECK-SAME: %[[ARG0:.*]]: memref<2048xi8, 1>, %[[ARG1:.*]]: memref<2048xi8> +// CHECK: memref.copy %[[ARG0]], %[[ARG1]] +// CHECK: return + +// CHECK32-LABEL: func.func @memref_copy_i4( +// CHECK32-SAME: %[[ARG0:.*]]: memref<512xi32, 1>, %[[ARG1:.*]]: memref<512xi32> +// CHECK32: memref.copy %[[ARG0]], %[[ARG1]] +// CHECK32: return + +// ----- + +!colMajor = memref<8x8xi4, strided<[1, 8]>> +func.func @copy_distinct_layouts(%idx : index) -> i4 { + %c0 = arith.constant 0 : index + %arr = memref.alloc() : memref<8x8xi4> + %arr2 = memref.alloc() : !colMajor + // expected-error @+1 {{failed to legalize operation 'memref.copy' that was explicitly marked illegal}} + memref.copy %arr, %arr2 : memref<8x8xi4> to !colMajor + %ld = memref.load %arr2[%c0, %c0] : !colMajor + return %ld : i4 +} diff --git a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir index d884ade3195329..8aac802ba10ae9 100644 --- a/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir +++ b/mlir/test/Dialect/MemRef/expand-strided-metadata.mlir @@ -1553,3 +1553,41 @@ func.func @extract_strided_metadata_of_collapse_shape(%base: memref<5x4xf32>) // CHECK-DAG: %[[STEP:.*]] = arith.constant 1 : index // CHECK: %[[BASE:.*]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata // CHECK: return %[[BASE]], %[[OFFSET]], %[[SIZE]], %[[STEP]] : memref, index, index, index + +// ----- + +func.func @extract_strided_metadata_of_memory_space_cast(%base: memref<20xf32>) + -> (memref, index, index, index) { + + %memory_space_cast = memref.memory_space_cast %base : memref<20xf32> to memref<20xf32, 1> + + %base_buffer, %offset, %size, %stride = memref.extract_strided_metadata %memory_space_cast : + memref<20xf32, 1> -> memref, index, index, index + + return %base_buffer, %offset, %size, %stride : + memref, index, index, index +} + +// CHECK-LABEL: func @extract_strided_metadata_of_memory_space_cast +// CHECK-DAG: %[[OFFSET:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[SIZE:.*]] = arith.constant 20 : index +// CHECK-DAG: %[[STEP:.*]] = arith.constant 1 : index +// CHECK: %[[BASE:.*]], %{{.*}}, %{{.*}}, %{{.*}} = memref.extract_strided_metadata +// CHECK: %[[CAST:.*]] = memref.memory_space_cast %[[BASE]] +// CHECK: return %[[CAST]], %[[OFFSET]], %[[SIZE]], %[[STEP]] : memref, index, index, index + +// ----- + +func.func @extract_strided_metadata_of_memory_space_cast_no_base(%base: memref<20xf32>) + -> (index, index, index) { + + %memory_space_cast = memref.memory_space_cast %base : memref<20xf32> to memref<20xf32, 1> + + %base_buffer, %offset, %size, %stride = memref.extract_strided_metadata %memory_space_cast : + memref<20xf32, 1> -> memref, index, index, index + + return %offset, %size, %stride : index, index, index +} + +// CHECK-LABEL: func @extract_strided_metadata_of_memory_space_cast_no_base +// CHECK-NOT: memref.memory_space_cast From 564efe26745c5bb7236b095d5b42881cdc64a284 Mon Sep 17 00:00:00 2001 From: pcc Date: Wed, 14 Aug 2024 17:25:28 -0700 Subject: [PATCH 13/47] utils/git: Add linkify script. This script linkifies (i.e. makes clickable in the terminal) text that appears to be a pull request or issue reference (e.g. #12345 or PR12345) or a 40-character commit hash (e.g. abc123). You can configure git to automatically send the output of commands that pipe their output through a pager, such as `git log` and `git show`, through this script by running this command from within your LLVM checkout: git config core.pager 'llvm/utils/git/linkify | pager' The pager command is run from the root of the repository even if the git command is run from a subdirectory, so the relative path should always work. It requires OSC 8 support in the terminal. For a list of compatible terminals, see https://github.com/Alhadis/OSC8-Adoption Reviewers: MaskRay Reviewed By: MaskRay Pull Request: https://github.com/llvm/llvm-project/pull/103496 --- llvm/utils/git/linkify | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100755 llvm/utils/git/linkify diff --git a/llvm/utils/git/linkify b/llvm/utils/git/linkify new file mode 100755 index 00000000000000..9fcadd758492cf --- /dev/null +++ b/llvm/utils/git/linkify @@ -0,0 +1,20 @@ +#!/bin/sh + +# This script linkifies (i.e. makes clickable in the terminal) text that appears +# to be a pull request or issue reference (e.g. #12345 or PR12345) or a +# 40-character commit hash (e.g. abc123). You can configure git to automatically +# send the output of commands that pipe their output through a pager, such as +# `git log` and `git show`, through this script by running this command from +# within your LLVM checkout: +# +# git config core.pager 'llvm/utils/git/linkify | pager' +# +# The pager command is run from the root of the repository even if the git +# command is run from a subdirectory, so the relative path should always work. +# +# It requires OSC 8 support in the terminal. For a list of compatible terminals, +# see https://github.com/Alhadis/OSC8-Adoption + +sed \ + -e 's,\(#\|\bPR\)\([0-9]\+\),\x1b]8;;https://github.com/llvm/llvm-project/issues/\2\x1b\\\0\x1b]8;;\x1b\\,gi' \ + -e 's,[0-9a-f]\{40\},\x1b]8;;https://github.com/llvm/llvm-project/commit/\0\x1b\\\0\x1b]8;;\x1b\\,g' From 7275919cd5fc89c42a52168c9f4411b4e5421c95 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 14 Aug 2024 17:46:30 -0700 Subject: [PATCH 14/47] Use clang_cc1 and specify the target explicitly. --- clang/test/CodeGen/overflow-idiom-exclusion-fp.c | 4 ++-- clang/test/CodeGen/overflow-idiom-exclusion.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c index d21405c56beab3..f0c4f874d59f4e 100644 --- a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c +++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c @@ -1,5 +1,5 @@ -// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s -// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s // Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns extern unsigned a, b, c; diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c index 7c8c4af61029de..da1203c074b19f 100644 --- a/clang/test/CodeGen/overflow-idiom-exclusion.c +++ b/clang/test/CodeGen/overflow-idiom-exclusion.c @@ -1,8 +1,8 @@ -// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - | FileCheck %s -// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - | FileCheck %s -// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - | FileCheck %s --check-prefix=ADD -// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - | FileCheck %s --check-prefix=NEGATE -// RUN: %clang %s -O2 -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - | FileCheck %s --check-prefix=WHILE +// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD +// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE +// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE // Ensure some common overflow-dependent or overflow-prone code patterns don't // trigger the overflow sanitizers. In many cases, overflow warnings caused by From 5873aa83b871393b5ada4c2033445d5fd52d7461 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 14 Aug 2024 17:47:47 -0700 Subject: [PATCH 15/47] Remove '-emit-llvm' and use '-triple' --- clang/test/CodeGen/overflow-idiom-exclusion-fp.c | 4 ++-- clang/test/CodeGen/overflow-idiom-exclusion.c | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c index f0c4f874d59f4e..1d26caa5b4f54b 100644 --- a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c +++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s // Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns extern unsigned a, b, c; diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c index da1203c074b19f..02dd3ef3ae42da 100644 --- a/clang/test/CodeGen/overflow-idiom-exclusion.c +++ b/clang/test/CodeGen/overflow-idiom-exclusion.c @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD -// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE -// RUN: %clang_cc1 -target x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -o - %s | FileCheck %s --check-prefix=ADD +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -o - %s | FileCheck %s --check-prefix=NEGATE +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -o - %s | FileCheck %s --check-prefix=WHILE // Ensure some common overflow-dependent or overflow-prone code patterns don't // trigger the overflow sanitizers. In many cases, overflow warnings caused by From 4411d1e3926d67c393e6a7bdb910bbe77507ff26 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 14 Aug 2024 18:32:29 -0700 Subject: [PATCH 16/47] [sanitizer] Remove GetCurrentThread nullness checks from Allocate The nullness check is unreachable. * For the main thead and pthread_create created threads, the `*Allocate` functions must be called after `*_current_thread` is set. set. * For threads created by Linux's `clone`, static TLS is either reused or set to a new value (CLONE_SETTLS). Make this change for asan/msan and possibly extend the change to other sanitizers. (asan supports many platforms and I am not 100% certain that all platforms have the property.) Pull Request: https://github.com/llvm/llvm-project/pull/102828 --- compiler-rt/lib/asan/asan_allocator.cpp | 11 ++--------- compiler-rt/lib/msan/msan_allocator.cpp | 11 ++--------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 9e66f77217ec6b..e041861edaf0b7 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -576,15 +576,8 @@ struct Allocator { } AsanThread *t = GetCurrentThread(); - void *allocated; - if (t) { - AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); - allocated = allocator.Allocate(cache, needed_size, 8); - } else { - SpinMutexLock l(&fallback_mutex); - AllocatorCache *cache = &fallback_allocator_cache; - allocated = allocator.Allocate(cache, needed_size, 8); - } + void *allocated = allocator.Allocate( + GetAllocatorCache(&t->malloc_storage()), needed_size, 8); if (UNLIKELY(!allocated)) { SetAllocatorOutOfMemory(); if (AllocatorMayReturnNull()) diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp index d7d4967c949859..f478b9979f2daa 100644 --- a/compiler-rt/lib/msan/msan_allocator.cpp +++ b/compiler-rt/lib/msan/msan_allocator.cpp @@ -199,15 +199,8 @@ static void *MsanAllocate(BufferedStackTrace *stack, uptr size, uptr alignment, ReportRssLimitExceeded(stack); } MsanThread *t = GetCurrentThread(); - void *allocated; - if (t) { - AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); - allocated = allocator.Allocate(cache, size, alignment); - } else { - SpinMutexLock l(&fallback_mutex); - AllocatorCache *cache = &fallback_allocator_cache; - allocated = allocator.Allocate(cache, size, alignment); - } + void *allocated = allocator.Allocate(GetAllocatorCache(&t->malloc_storage()), + size, alignment); if (UNLIKELY(!allocated)) { SetAllocatorOutOfMemory(); if (AllocatorMayReturnNull()) From aca01bff07c225dbace6cb7743072ddfe78c43f0 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 14 Aug 2024 18:46:25 -0700 Subject: [PATCH 17/47] [ctx_prof] CtxProfAnalysis: populate module data (#102930) Continuing from #102084, which introduced the analysis, we now populate it with info about functions contained in the module. When we will update the profile due to e.g. inlined callsites, we'll ingest the callee's counters and callsites to the caller. We'll move those to the caller's respective index space (counter and callers), so we need to know and maintain where those currently end. We also don't need to keep profiles not pertinent to this module. This patch also introduces an arguably much simpler way to track the GUID of a function from the frontend compilation, through ThinLTO, and into the post-thinlink compilation step, which doesn't rely on keeping names around. A separate RFC and patches will discuss extending this to the current PGO (instrumented and sampled) and other consumers as an infrastructural component. --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 60 ++++++++- llvm/lib/Analysis/CtxProfAnalysis.cpp | 93 +++++++++++++- llvm/lib/Passes/PassBuilderPipelines.cpp | 4 + llvm/lib/Passes/PassRegistry.def | 1 + .../Instrumentation/PGOCtxProfLowering.cpp | 6 +- .../Analysis/CtxProfAnalysis/full-cycle.ll | 119 ++++++++++++++++++ llvm/test/Analysis/CtxProfAnalysis/load.ll | 113 +++++++++++++---- .../PGOProfile/ctx-instrumentation.ll | 26 ++-- .../PGOProfile/ctx-prof-use-prelink.ll | 4 +- 9 files changed, 385 insertions(+), 41 deletions(-) create mode 100644 llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index d77c81d03582e1..f0e2aeb0f92f74 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -9,10 +9,10 @@ #ifndef LLVM_ANALYSIS_CTXPROFANALYSIS_H #define LLVM_ANALYSIS_CTXPROFANALYSIS_H +#include "llvm/ADT/DenseMap.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/PassManager.h" #include "llvm/ProfileData/PGOCtxProfReader.h" -#include namespace llvm { @@ -20,12 +20,28 @@ class CtxProfAnalysis; /// The instrumented contextual profile, produced by the CtxProfAnalysis. class PGOContextualProfile { + friend class CtxProfAnalysis; + friend class CtxProfAnalysisPrinterPass; + struct FunctionInfo { + uint32_t NextCounterIndex = 0; + uint32_t NextCallsiteIndex = 0; + const std::string Name; + + FunctionInfo(StringRef Name) : Name(Name) {} + }; std::optional Profiles; + // For the GUIDs in this module, associate metadata about each function which + // we'll need when we maintain the profiles during IPO transformations. + DenseMap FuncInfo; -public: - explicit PGOContextualProfile(PGOCtxProfContext::CallTargetMapTy &&Profiles) - : Profiles(std::move(Profiles)) {} + /// Get the GUID of this Function if it's defined in this module. + GlobalValue::GUID getDefinedFunctionGUID(const Function &F) const; + + // This is meant to be constructed from CtxProfAnalysis, which will also set + // its state piecemeal. PGOContextualProfile() = default; + +public: PGOContextualProfile(const PGOContextualProfile &) = delete; PGOContextualProfile(PGOContextualProfile &&) = default; @@ -35,6 +51,20 @@ class PGOContextualProfile { return *Profiles; } + bool isFunctionKnown(const Function &F) const { + return getDefinedFunctionGUID(F) != 0; + } + + uint32_t allocateNextCounterIndex(const Function &F) { + assert(isFunctionKnown(F)); + return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++; + } + + uint32_t allocateNextCallsiteIndex(const Function &F) { + assert(isFunctionKnown(F)); + return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex++; + } + bool invalidate(Module &, const PreservedAnalyses &PA, ModuleAnalysisManager::Invalidator &) { // Check whether the analysis has been explicitly invalidated. Otherwise, @@ -66,5 +96,27 @@ class CtxProfAnalysisPrinterPass PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); static bool isRequired() { return true; } }; + +/// Assign a GUID to functions as metadata. GUID calculation takes linkage into +/// account, which may change especially through and after thinlto. By +/// pre-computing and assigning as metadata, this mechanism is resilient to such +/// changes (as well as name changes e.g. suffix ".llvm." additions). + +// FIXME(mtrofin): we can generalize this mechanism to calculate a GUID early in +// the pass pipeline, associate it with any Global Value, and then use it for +// PGO and ThinLTO. +// At that point, this should be moved elsewhere. +class AssignGUIDPass : public PassInfoMixin { +public: + explicit AssignGUIDPass() = default; + + /// Assign a GUID *if* one is not already assign, as a function metadata named + /// `GUIDMetadataName`. + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + static const char *GUIDMetadataName; + // This should become GlobalValue::getGUID + static uint64_t getGUID(const Function &F); +}; + } // namespace llvm #endif // LLVM_ANALYSIS_CTXPROFANALYSIS_H diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index fbae705127538a..5bf336dd311158 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -14,12 +14,14 @@ #include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Analysis.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/ProfileData/PGOCtxProfReader.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #define DEBUG_TYPE "ctx_prof" @@ -64,10 +66,39 @@ Value toJSON(const PGOCtxProfContext::CallTargetMapTy &P) { } // namespace json } // namespace llvm +const char *AssignGUIDPass::GUIDMetadataName = "guid"; + +PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) { + for (auto &F : M.functions()) { + if (F.isDeclaration()) + continue; + if (F.getMetadata(GUIDMetadataName)) + continue; + const GlobalValue::GUID GUID = F.getGUID(); + F.setMetadata(GUIDMetadataName, + MDNode::get(M.getContext(), + {ConstantAsMetadata::get(ConstantInt::get( + Type::getInt64Ty(M.getContext()), GUID))})); + } + return PreservedAnalyses::none(); +} + +GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) { + if (F.isDeclaration()) { + assert(GlobalValue::isExternalLinkage(F.getLinkage())); + return GlobalValue::getGUID(F.getGlobalIdentifier()); + } + auto *MD = F.getMetadata(GUIDMetadataName); + assert(MD && "guid not found for defined function"); + return cast(cast(MD->getOperand(0)) + ->getValue() + ->stripPointerCasts()) + ->getZExtValue(); +} AnalysisKey CtxProfAnalysis::Key; -CtxProfAnalysis::Result CtxProfAnalysis::run(Module &M, - ModuleAnalysisManager &MAM) { +PGOContextualProfile CtxProfAnalysis::run(Module &M, + ModuleAnalysisManager &MAM) { ErrorOr> MB = MemoryBuffer::getFile(Profile); if (auto EC = MB.getError()) { M.getContext().emitError("could not open contextual profile file: " + @@ -81,7 +112,55 @@ CtxProfAnalysis::Result CtxProfAnalysis::run(Module &M, toString(MaybeCtx.takeError())); return {}; } - return Result(std::move(*MaybeCtx)); + + PGOContextualProfile Result; + + for (const auto &F : M) { + if (F.isDeclaration()) + continue; + auto GUID = AssignGUIDPass::getGUID(F); + assert(GUID && "guid not found for defined function"); + const auto &Entry = F.begin(); + uint32_t MaxCounters = 0; // we expect at least a counter. + for (const auto &I : *Entry) + if (auto *C = dyn_cast(&I)) { + MaxCounters = + static_cast(C->getNumCounters()->getZExtValue()); + break; + } + if (!MaxCounters) + continue; + uint32_t MaxCallsites = 0; + for (const auto &BB : F) + for (const auto &I : BB) + if (auto *C = dyn_cast(&I)) { + MaxCallsites = + static_cast(C->getNumCounters()->getZExtValue()); + break; + } + auto [It, Ins] = Result.FuncInfo.insert( + {GUID, PGOContextualProfile::FunctionInfo(F.getName())}); + (void)Ins; + assert(Ins); + It->second.NextCallsiteIndex = MaxCallsites; + It->second.NextCounterIndex = MaxCounters; + } + // If we made it this far, the Result is valid - which we mark by setting + // .Profiles. + // Trim first the roots that aren't in this module. + DenseSet ProfiledGUIDs; + for (auto &[RootGuid, _] : llvm::make_early_inc_range(*MaybeCtx)) + if (!Result.FuncInfo.contains(RootGuid)) + MaybeCtx->erase(RootGuid); + Result.Profiles = std::move(*MaybeCtx); + return Result; +} + +GlobalValue::GUID +PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const { + if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end()) + return It->first; + return 0; } PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, @@ -91,8 +170,16 @@ PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M, M.getContext().emitError("Invalid CtxProfAnalysis"); return PreservedAnalyses::all(); } + + OS << "Function Info:\n"; + for (const auto &[Guid, FuncInfo] : C.FuncInfo) + OS << Guid << " : " << FuncInfo.Name + << ". MaxCounterID: " << FuncInfo.NextCounterIndex + << ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n"; + const auto JSONed = ::llvm::json::toJSON(C.profiles()); + OS << "\nCurrent Profile:\n"; OS << formatv("{0:2}", JSONed); OS << "\n"; return PreservedAnalyses::all(); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 6927a2886b962b..0201e69f3e216a 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/ProfileSummaryInfo.h" @@ -1196,6 +1197,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // In pre-link, we just want the instrumented IR. We use the contextual // profile in the post-thinlink phase. // The instrumentation will be removed in post-thinlink after IPO. + // FIXME(mtrofin): move AssignGUIDPass if there is agreement to use this + // mechanism for GUIDs. + MPM.addPass(AssignGUIDPass()); if (IsCtxProfUse) return MPM; addPostPGOLoopRotation(MPM, Level); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 4fdded7b82f36b..18f4aa19224da0 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -46,6 +46,7 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) #endif MODULE_PASS("always-inline", AlwaysInlinerPass()) MODULE_PASS("annotation2metadata", Annotation2MetadataPass()) +MODULE_PASS("assign-guid", AssignGUIDPass()) MODULE_PASS("attributor", AttributorPass()) MODULE_PASS("attributor-light", AttributorLightPass()) MODULE_PASS("called-value-propagation", CalledValuePropagationPass()) diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp index d6ba12465bb328..9b10cbba84075a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp @@ -8,6 +8,7 @@ // #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" +#include "llvm/Analysis/CtxProfAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/Analysis.h" #include "llvm/IR/DiagnosticInfo.h" @@ -16,6 +17,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/CommandLine.h" #include @@ -223,8 +225,8 @@ bool CtxInstrumentationLowerer::lowerFunction(Function &F) { assert(Mark->getIndex()->isZero()); IRBuilder<> Builder(Mark); - // FIXME(mtrofin): use InstrProfSymtab::getCanonicalName - Guid = Builder.getInt64(F.getGUID()); + + Guid = Builder.getInt64(AssignGUIDPass::getGUID(F)); // The type of the context of this function is now knowable since we have // NrCallsites and NrCounters. We delcare it here because it's more // convenient - we have the Builder. diff --git a/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll new file mode 100644 index 00000000000000..0cdf82bd96efcb --- /dev/null +++ b/llvm/test/Analysis/CtxProfAnalysis/full-cycle.ll @@ -0,0 +1,119 @@ +; REQUIRES: x86_64-linux +; +; RUN: rm -rf %t +; RUN: split-file %s %t +; +; Test that the GUID metadata survives through thinlink. +; +; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata +; +; RUN: opt -module-summary -passes='thinlto-pre-link' -use-ctx-profile=%t/profile.ctxprofdata -o %t/m1.bc %t/m1.ll +; RUN: opt -module-summary -passes='thinlto-pre-link' -use-ctx-profile=%t/profile.ctxprofdata -o %t/m2.bc %t/m2.ll +; +; RUN: rm -rf %t/postlink +; RUN: mkdir %t/postlink +; +; +; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc -o %t/ -thinlto-distributed-indexes \ +; RUN: -use-ctx-profile=%t/profile.ctxprofdata \ +; RUN: -r %t/m1.bc,f1,plx \ +; RUN: -r %t/m2.bc,f1 \ +; RUN: -r %t/m2.bc,entrypoint,plx +; RUN: opt --passes='function-import,require,print' \ +; RUN: -summary-file=%t/m2.bc.thinlto.bc -use-ctx-profile=%t/profile.ctxprofdata %t/m2.bc \ +; RUN: -S -o %t/m2.post.ll 2> %t/profile.txt +; RUN: diff %t/expected.txt %t/profile.txt +;--- m1.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +source_filename = "random_path/m1.cc" + +define private void @f2() #0 !guid !0 { + ret void +} + +define void @f1() #0 { + call void @f2() + ret void +} + +attributes #0 = { noinline } +!0 = !{ i64 3087265239403591524 } + +;--- m2.ll +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +source_filename = "random_path/m2.cc" + +declare void @f1() + +define void @entrypoint() { + call void @f1() + ret void +} +;--- profile.json +[ + { + "Callsites": [ + [ + { + "Callsites": [ + [ + { + "Counters": [ + 10 + ], + "Guid": 3087265239403591524 + } + ] + ], + "Counters": [ + 7 + ], + "Guid": 2072045998141807037 + } + ] + ], + "Counters": [ + 1 + ], + "Guid": 10507721908651011566 + } +] +;--- expected.txt +Function Info: +10507721908651011566 : entrypoint. MaxCounterID: 1. MaxCallsiteID: 1 +3087265239403591524 : f2.llvm.0. MaxCounterID: 1. MaxCallsiteID: 0 +2072045998141807037 : f1. MaxCounterID: 1. MaxCallsiteID: 1 + +Current Profile: +[ + { + "Callsites": [ + [ + { + "Callsites": [ + [ + { + "Counters": [ + 10 + ], + "Guid": 3087265239403591524 + } + ] + ], + "Counters": [ + 7 + ], + "Guid": 2072045998141807037 + } + ] + ], + "Counters": [ + 1 + ], + "Guid": 10507721908651011566 + } +] diff --git a/llvm/test/Analysis/CtxProfAnalysis/load.ll b/llvm/test/Analysis/CtxProfAnalysis/load.ll index 9cd78cfef187ba..69806e334aaec9 100644 --- a/llvm/test/Analysis/CtxProfAnalysis/load.ll +++ b/llvm/test/Analysis/CtxProfAnalysis/load.ll @@ -1,16 +1,22 @@ ; REQUIRES: x86_64-linux - +; +; RUN: rm -rf %t ; RUN: split-file %s %t ; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata ; RUN: not opt -passes='require,print' \ -; RUN: %t/empty.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE +; RUN: %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE ; RUN: not opt -passes='require,print' \ -; RUN: -use-ctx-profile=does_not_exist.ctxprofdata %t/empty.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE +; RUN: -use-ctx-profile=does_not_exist.ctxprofdata %t/example.ll -S 2>&1 | FileCheck %s --check-prefix=NO-FILE +; RUN: opt -module-summary -passes='thinlto-pre-link' \ +; RUN: -use-ctx-profile=%t/profile.ctxprofdata %t/example.ll -S -o %t/prelink.ll + +; RUN: opt -module-summary -passes='thinlto-pre-link' -use-ctx-profile=%t/profile.ctxprofdata \ +; RUN: %t/example.ll -S -o %t/prelink.ll ; RUN: opt -passes='require,print' \ -; RUN: -use-ctx-profile=%t/profile.ctxprofdata %t/empty.ll -S 2> %t/output.json -; RUN: diff %t/profile.json %t/output.json +; RUN: -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S 2> %t/output.txt +; RUN: diff %t/expected-profile-output.txt %t/output.txt ; NO-FILE: error: could not open contextual profile file ; @@ -18,41 +24,104 @@ ; output it from opt. ;--- profile.json [ + { + "Counters": [ + 9 + ], + "Guid": 12341 + }, + { + "Counters": [ + 5 + ], + "Guid": 12074870348631550642 + }, { "Callsites": [ - [], [ { "Counters": [ - 4, - 5 + 6, + 7 ], - "Guid": 2000 - }, + "Guid": 728453322856651412 + } + ] + ], + "Counters": [ + 1 + ], + "Guid": 11872291593386833696 + } +] +;--- expected-profile-output.txt +Function Info: +4909520559318251808 : an_entrypoint. MaxCounterID: 2. MaxCallsiteID: 1 +12074870348631550642 : another_entrypoint_no_callees. MaxCounterID: 1. MaxCallsiteID: 0 +11872291593386833696 : foo. MaxCounterID: 1. MaxCallsiteID: 1 + +Current Profile: +[ + { + "Callsites": [ + [ { "Counters": [ 6, - 7, - 8 + 7 ], - "Guid": 18446744073709551613 + "Guid": 728453322856651412 } ] ], "Counters": [ - 1, - 2, - 3 + 1 ], - "Guid": 1000 + "Guid": 11872291593386833696 }, { "Counters": [ - 5, - 9, - 10 + 5 ], - "Guid": 18446744073709551612 + "Guid": 12074870348631550642 } ] -;--- empty.ll +;--- example.ll +declare void @bar() + +define private void @foo(i32 %a, ptr %fct) #0 !guid !0 { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no +yes: + call void %fct(i32 %a) + br label %exit +no: + call void @bar() + br label %exit +exit: + ret void +} + +define void @an_entrypoint(i32 %a) { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no + +yes: + call void @foo(i32 1, ptr null) + ret void +no: + ret void +} + +define void @another_entrypoint_no_callees(i32 %a) { + %t = icmp eq i32 %a, 0 + br i1 %t, label %yes, label %no + +yes: + ret void +no: + ret void +} + +attributes #0 = { noinline } +!0 = !{ i64 11872291593386833696 } \ No newline at end of file diff --git a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll index 56c7c7519f6945..a70f94e1521f0d 100644 --- a/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll +++ b/llvm/test/Transforms/PGOProfile/ctx-instrumentation.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4 ; RUN: opt -passes=pgo-instr-gen -profile-context-root=an_entrypoint \ ; RUN: -S < %s | FileCheck --check-prefix=INSTRUMENT %s -; RUN: opt -passes=pgo-instr-gen,ctx-instr-lower -profile-context-root=an_entrypoint \ +; RUN: opt -passes=pgo-instr-gen,assign-guid,ctx-instr-lower -profile-context-root=an_entrypoint \ ; RUN: -profile-context-root=another_entrypoint_no_callees \ ; RUN: -S < %s | FileCheck --check-prefix=LOWERING %s @@ -46,7 +46,7 @@ define void @foo(i32 %a, ptr %fct) { ; INSTRUMENT-NEXT: ret void ; ; LOWERING-LABEL: define void @foo( -; LOWERING-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) { +; LOWERING-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) !guid [[META0:![0-9]+]] { ; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @foo, i64 6699318081062747564, i32 2, i32 2) ; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1 @@ -104,7 +104,7 @@ define void @an_entrypoint(i32 %a) { ; INSTRUMENT-NEXT: ret void ; ; LOWERING-LABEL: define void @an_entrypoint( -; LOWERING-SAME: i32 [[A:%.*]]) { +; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META1:![0-9]+]] { ; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @an_entrypoint_ctx_root, i64 4909520559318251808, i32 2, i32 1) ; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1 @@ -154,7 +154,7 @@ define void @another_entrypoint_no_callees(i32 %a) { ; INSTRUMENT-NEXT: ret void ; ; LOWERING-LABEL: define void @another_entrypoint_no_callees( -; LOWERING-SAME: i32 [[A:%.*]]) { +; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META2:![0-9]+]] { ; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_start_context(ptr @another_entrypoint_no_callees_ctx_root, i64 -6371873725078000974, i32 2, i32 0) ; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2 @@ -188,7 +188,7 @@ define void @simple(i32 %a) { ; INSTRUMENT-NEXT: ret void ; ; LOWERING-LABEL: define void @simple( -; LOWERING-SAME: i32 [[A:%.*]]) { +; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META3:![0-9]+]] { ; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @simple, i64 -3006003237940970099, i32 1, i32 0) ; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2 @@ -212,7 +212,7 @@ define i32 @no_callsites(i32 %a) { ; INSTRUMENT-NEXT: ret i32 0 ; ; LOWERING-LABEL: define i32 @no_callsites( -; LOWERING-SAME: i32 [[A:%.*]]) { +; LOWERING-SAME: i32 [[A:%.*]]) !guid [[META4:![0-9]+]] { ; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_callsites, i64 5679753335911435902, i32 2, i32 0) ; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], -2 @@ -243,7 +243,8 @@ define void @no_counters() { ; INSTRUMENT-NEXT: call void @bar() ; INSTRUMENT-NEXT: ret void ; -; LOWERING-LABEL: define void @no_counters() { +; LOWERING-LABEL: define void @no_counters( +; LOWERING-SAME: ) !guid [[META5:![0-9]+]] { ; LOWERING-NEXT: [[TMP1:%.*]] = call ptr @__llvm_ctx_profile_get_context(ptr @no_counters, i64 5458232184388660970, i32 1, i32 1) ; LOWERING-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; LOWERING-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1 @@ -263,8 +264,15 @@ define void @no_counters() { ret void } ;. -; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind } -;. ; LOWERING: attributes #[[ATTR0:[0-9]+]] = { nounwind } ; LOWERING: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. +; INSTRUMENT: attributes #[[ATTR0:[0-9]+]] = { nounwind } +;. +; LOWERING: [[META0]] = !{i64 6699318081062747564} +; LOWERING: [[META1]] = !{i64 4909520559318251808} +; LOWERING: [[META2]] = !{i64 -6371873725078000974} +; LOWERING: [[META3]] = !{i64 -3006003237940970099} +; LOWERING: [[META4]] = !{i64 5679753335911435902} +; LOWERING: [[META5]] = !{i64 5458232184388660970} +;. diff --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll index 18ac2f92aa39d4..cb8ab78dc0f414 100644 --- a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll +++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll @@ -12,7 +12,7 @@ declare void @bar() ;. define void @foo(i32 %a, ptr %fct) { ; CHECK-LABEL: define void @foo( -; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr { +; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr !guid [[META0:![0-9]+]] { ; CHECK-NEXT: call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0) ; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[A]], 0 ; CHECK-NEXT: br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]] @@ -42,3 +42,5 @@ exit: ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind } ;. +; CHECK: [[META0]] = !{i64 6699318081062747564} +;. From bd47ba705607033c3dab0037a8d64159b58bedb4 Mon Sep 17 00:00:00 2001 From: Jessica Clarke Date: Thu, 15 Aug 2024 02:50:07 +0100 Subject: [PATCH 18/47] [ELF][NFC] Allow non-GotSection for addAddendOnlyRelocIfNonPreemptible (#104228) This was done as an afterthought in c3c9e4531287 without justification. Nothing relies on it being a specific kind of section, and downstream in CHERI LLVM we pass a non-GotSection to this function. Thus revert this overly-restrictive change and allow downstreams to pass other section types again. This partially reverts commit c3c9e45312874ff890723f54cabfd41e43b2dbc4. --- lld/ELF/SyntheticSections.cpp | 6 +++--- lld/ELF/SyntheticSections.h | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index c27ab2b67dc2b2..7d26fa9aea74ab 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1639,14 +1639,14 @@ void RelocationBaseSection::addSymbolReloc( } void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( - RelType dynType, GotSection &sec, uint64_t offsetInSec, Symbol &sym, + RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType) { // No need to write an addend to the section for preemptible symbols. if (sym.isPreemptible) - addReloc({dynType, &sec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0, + addReloc({dynType, &isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0, R_ABS}); else - addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, sec, offsetInSec, + addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, offsetInSec, sym, 0, R_ABS, addendRelType); } diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index d4169e1e1acaf6..43eb82cbb3e28b 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -523,7 +523,8 @@ class RelocationBaseSection : public SyntheticSection { } /// Add a dynamic relocation using the target address of \p sym as the addend /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym. - void addAddendOnlyRelocIfNonPreemptible(RelType dynType, GotSection &sec, + void addAddendOnlyRelocIfNonPreemptible(RelType dynType, + InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType); template From 0df91893efc752a76c7bbe6b063d66c8a2fa0d55 Mon Sep 17 00:00:00 2001 From: alx32 <103613512+alx32@users.noreply.github.com> Date: Wed, 14 Aug 2024 19:30:41 -0700 Subject: [PATCH 19/47] [lld-macho] Fix crash: ObjC category merge + relative method lists (#104081) A crash was happening when both ObjC Category Merging and Relative method lists were enabled. ObjC Category Merging creates new data sections and adds them by calling `addInputSection`. `addInputSection` uses the symbols within the added section to determine which container to actually add the section to. The issue is that ObjC Category merging is calling `addInputSection` before actually adding the relevant symbols the the added section. This causes `addInputSection` to add the `InputSection` to the wrong container, eventually resulting in a crash. To fix this, we ensure that ObjC Category Merging calls `addInputSection` only after the symbols have been added to the `InputSection`. --- lld/MachO/ObjC.cpp | 10 +++++----- .../MachO/objc-category-merging-minimal.s | 20 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 18b7521ed1ad2a..b9f7592fa9c663 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -851,7 +851,6 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList( infoCategoryWriter.catPtrListInfo.align); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; listSec->live = true; - addInputSection(listSec); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; @@ -867,6 +866,7 @@ Defined *ObjcCategoryMerger::emitAndLinkProtocolList( ptrListSym->used = true; parentSym->getObjectFile()->symbols.push_back(ptrListSym); + addInputSection(listSec); createSymbolReference(parentSym, ptrListSym, linkAtOffset, infoCategoryWriter.catBodyInfo.relocTemplate); @@ -911,7 +911,6 @@ void ObjcCategoryMerger::emitAndLinkPointerList( infoCategoryWriter.catPtrListInfo.align); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; listSec->live = true; - addInputSection(listSec); listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; @@ -927,6 +926,7 @@ void ObjcCategoryMerger::emitAndLinkPointerList( ptrListSym->used = true; parentSym->getObjectFile()->symbols.push_back(ptrListSym); + addInputSection(listSec); createSymbolReference(parentSym, ptrListSym, linkAtOffset, infoCategoryWriter.catBodyInfo.relocTemplate); @@ -952,7 +952,6 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName, bodyData, infoCategoryWriter.catListInfo.align); newCatList->parent = infoCategoryWriter.catListInfo.outputSection; newCatList->live = true; - addInputSection(newCatList); newCatList->parent = infoCategoryWriter.catListInfo.outputSection; @@ -968,6 +967,7 @@ ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName, catListSym->used = true; objFile->symbols.push_back(catListSym); + addInputSection(newCatList); return catListSym; } @@ -990,7 +990,6 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, bodyData, infoCategoryWriter.catBodyInfo.align); newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection; newBodySec->live = true; - addInputSection(newBodySec); std::string symName = objc::symbol_names::category + baseClassName + "(" + name + ")"; @@ -1003,6 +1002,7 @@ Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, catBodySym->used = true; objFile->symbols.push_back(catBodySym); + addInputSection(newBodySec); createSymbolReference(catBodySym, nameSym, catLayout.nameOffset, infoCategoryWriter.catBodyInfo.relocTemplate); @@ -1223,7 +1223,6 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories( infoCategoryWriter.catListInfo.align); listSec->parent = infoCategoryWriter.catListInfo.outputSection; listSec->live = true; - addInputSection(listSec); std::string slotSymName = "<__objc_catlist slot for category "; slotSymName += nonErasedCatBody->getName(); @@ -1238,6 +1237,7 @@ void ObjcCategoryMerger::generateCatListForNonErasedCategories( catListSlotSym->used = true; objFile->symbols.push_back(catListSlotSym); + addInputSection(listSec); // Now link the category body into the newly created slot createSymbolReference(catListSlotSym, nonErasedCatBody, 0, diff --git a/lld/test/MachO/objc-category-merging-minimal.s b/lld/test/MachO/objc-category-merging-minimal.s index 527493303c583e..b94799a57a4d85 100644 --- a/lld/test/MachO/objc-category-merging-minimal.s +++ b/lld/test/MachO/objc-category-merging-minimal.s @@ -9,7 +9,7 @@ ## Create our main testing dylib - linking against the fake dylib above # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_cat_minimal.o merge_cat_minimal.s # RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_no_merge.dylib a64_fakedylib.dylib merge_cat_minimal.o -# RUN: %lld -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o +# RUN: %lld -objc_relative_method_lists -arch arm64 -dylib -o merge_cat_minimal_merge.dylib -objc_category_merging a64_fakedylib.dylib merge_cat_minimal.o ## Now verify that the flag caused category merging to happen appropriatelly # RUN: llvm-objdump --objc-meta-data --macho merge_cat_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_CATS @@ -17,7 +17,7 @@ ############ Test merging multiple categories into the base class ############ # RUN: llvm-mc -filetype=obj -triple=arm64-apple-macos -o merge_base_class_minimal.o merge_base_class_minimal.s -# RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o +# RUN: %lld -arch arm64 -dylib -objc_relative_method_lists -o merge_base_class_minimal_yes_merge.dylib -objc_category_merging merge_base_class_minimal.o merge_cat_minimal.o # RUN: %lld -arch arm64 -dylib -o merge_base_class_minimal_no_merge.dylib merge_base_class_minimal.o merge_cat_minimal.o # RUN: llvm-objdump --objc-meta-data --macho merge_base_class_minimal_no_merge.dylib | FileCheck %s --check-prefixes=NO_MERGE_INTO_BASE @@ -37,14 +37,14 @@ MERGE_CATS-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 MERGE_CATS: __OBJC_$_CATEGORY_MyBaseClass(Category01|Category02) MERGE_CATS-NEXT: name {{.*}} Category01|Category02 MERGE_CATS: instanceMethods -MERGE_CATS-NEXT: 24 -MERGE_CATS-NEXT: 2 +MERGE_CATS-NEXT: entsize 12 (relative) +MERGE_CATS-NEXT: count 2 MERGE_CATS-NEXT: name {{.*}} cat01_InstanceMethod MERGE_CATS-NEXT: types {{.*}} v16@0:8 -MERGE_CATS-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod] +MERGE_CATS-NEXT: imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod] MERGE_CATS-NEXT: name {{.*}} cat02_InstanceMethod MERGE_CATS-NEXT: types {{.*}} v16@0:8 -MERGE_CATS-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod] +MERGE_CATS-NEXT: imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod] MERGE_CATS-NEXT: classMethods 0x0 MERGE_CATS-NEXT: protocols 0x0 MERGE_CATS-NEXT: instanceProperties 0x0 @@ -69,17 +69,17 @@ YES_MERGE_INTO_BASE-NOT: __OBJC_$_CATEGORY_MyBaseClass_$_Category02 YES_MERGE_INTO_BASE: _OBJC_CLASS_$_MyBaseClass YES_MERGE_INTO_BASE-NEXT: _OBJC_METACLASS_$_MyBaseClass YES_MERGE_INTO_BASE: baseMethods -YES_MERGE_INTO_BASE-NEXT: entsize 24 +YES_MERGE_INTO_BASE-NEXT: entsize 12 (relative) YES_MERGE_INTO_BASE-NEXT: count 3 YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat01_InstanceMethod YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 -YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category01) cat01_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category01) cat01_InstanceMethod] YES_MERGE_INTO_BASE-NEXT: name {{.*}} cat02_InstanceMethod YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 -YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass(Category02) cat02_InstanceMethod] +YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass(Category02) cat02_InstanceMethod] YES_MERGE_INTO_BASE-NEXT: name {{.*}} baseInstanceMethod YES_MERGE_INTO_BASE-NEXT: types {{.*}} v16@0:8 -YES_MERGE_INTO_BASE-NEXT: imp -[MyBaseClass baseInstanceMethod] +YES_MERGE_INTO_BASE-NEXT: imp {{.*}} -[MyBaseClass baseInstanceMethod] #### Check merge swift category into base class ### From abaa53199ed03b2e9de9fd373cbcfcc88e5348ff Mon Sep 17 00:00:00 2001 From: LiqinWeng Date: Thu, 15 Aug 2024 10:37:04 +0800 Subject: [PATCH 20/47] [RISCV] Implement RISCVTTIImpl::shouldConsiderAddressTypePromotion for RISCV (#102560) This optimization helps reduce repeated calculations of base addresses by extracting type extensions when the same base address is accessed multiple times but its offset is a constant. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 1 + .../Target/RISCV/RISCVTargetTransformInfo.cpp | 32 +++++++ .../Target/RISCV/RISCVTargetTransformInfo.h | 4 +- .../RISCV/riscv-codegen-prepare-atp.ll | 95 +++++++++++++++++++ 4 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 02f48d41b56b3c..911fa45d7173e8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1428,6 +1428,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Disable strict node mutation. IsStrictFPEnabled = true; + EnableExtLdPromotion = true; // Let the subtarget decide if a predictable select is more expensive than the // corresponding branch. This information is used in CGP/SelectOpt to decide diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 85683c62064435..781e3d7929aa43 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -2000,3 +2000,35 @@ bool RISCVTTIImpl::areInlineCompatible(const Function *Caller, // target-features. return (CallerBits & CalleeBits) == CalleeBits; } + +/// See if \p I should be considered for address type promotion. We check if \p +/// I is a sext with right type and used in memory accesses. If it used in a +/// "complex" getelementptr, we allow it to be promoted without finding other +/// sext instructions that sign extended the same initial value. A getelementptr +/// is considered as "complex" if it has more than 2 operands. +bool RISCVTTIImpl::shouldConsiderAddressTypePromotion( + const Instruction &I, bool &AllowPromotionWithoutCommonHeader) { + bool Considerable = false; + AllowPromotionWithoutCommonHeader = false; + if (!isa(&I)) + return false; + Type *ConsideredSExtType = + Type::getInt64Ty(I.getParent()->getParent()->getContext()); + if (I.getType() != ConsideredSExtType) + return false; + // See if the sext is the one with the right type and used in at least one + // GetElementPtrInst. + for (const User *U : I.users()) { + if (const GetElementPtrInst *GEPInst = dyn_cast(U)) { + Considerable = true; + // A getelementptr is considered as "complex" if it has more than 2 + // operands. We will promote a SExt used in such complex GEP as we + // expect some computation to be merged if they are done on 64 bits. + if (GEPInst->getNumOperands() > 2) { + AllowPromotionWithoutCommonHeader = true; + break; + } + } + } + return Considerable; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 9c37a4f6ec2d04..f5eca2839acd05 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -397,7 +397,9 @@ class RISCVTTIImpl : public BasicTTIImplBase { bool shouldFoldTerminatingConditionAfterLSR() const { return true; } - + bool + shouldConsiderAddressTypePromotion(const Instruction &I, + bool &AllowPromotionWithoutCommonHeader); std::optional getMinPageSize() const { return 4096; } }; diff --git a/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll b/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll new file mode 100644 index 00000000000000..b733c6a1c787ba --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-codegen-prepare-atp.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -passes='require,function(codegenprepare)' < %s -S | FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "riscv64" + +%struct.match_state = type { i64, i64 } + +; %add is also promoted by forking an extra sext. +define void @promoteTwoOne(i32 %i, i32 %j, ptr %P1, ptr %P2 ) { +; CHECK-LABEL: define void @promoteTwoOne( +; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S2:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[PROMOTED2:%.*]] = sext i32 [[J]] to i64 +; CHECK-NEXT: [[S:%.*]] = add nsw i64 [[S2]], [[PROMOTED2]] +; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]] +; CHECK-NEXT: store i64 [[S]], ptr [[ADDR1]], align 8 +; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]] +; CHECK-NEXT: store i64 [[S2]], ptr [[ADDR2]], align 8 +; CHECK-NEXT: ret void +; +entry: + %add = add nsw i32 %i, %j + %s = sext i32 %add to i64 + %addr1 = getelementptr inbounds i64, ptr %P1, i64 %s + store i64 %s, ptr %addr1 + %s2 = sext i32 %i to i64 + %addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2 + store i64 %s2, ptr %addr2 + ret void +} + +; Both %add1 and %add2 are promoted by forking extra sexts. +define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, ptr %P1, ptr %P2) { +; CHECK-LABEL: define void @promoteTwoTwo( +; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]], i32 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PROMOTED3:%.*]] = sext i32 [[J]] to i64 +; CHECK-NEXT: [[PROMOTED4:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[S:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED4]] +; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[S]] +; CHECK-NEXT: store i64 [[S]], ptr [[ADDR1]], align 8 +; CHECK-NEXT: [[PROMOTED2:%.*]] = sext i32 [[K]] to i64 +; CHECK-NEXT: [[S2:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED2]] +; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[S2]] +; CHECK-NEXT: store i64 [[S2]], ptr [[ADDR2]], align 8 +; CHECK-NEXT: ret void +; +entry: + %add1 = add nsw i32 %j, %i + %s = sext i32 %add1 to i64 + %addr1 = getelementptr inbounds i64, ptr %P1, i64 %s + store i64 %s, ptr %addr1 + %add2 = add nsw i32 %j, %k + %s2 = sext i32 %add2 to i64 + %addr2 = getelementptr inbounds i64, ptr %P2, i64 %s2 + store i64 %s2, ptr %addr2 + ret void +} + +define i64 @promoteGEPSunk(i1 %cond, ptr %base, i32 %i) { +; CHECK-LABEL: define i64 @promoteGEPSunk( +; CHECK-SAME: i1 [[COND:%.*]], ptr [[BASE:%.*]], i32 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PROMOTED1:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[S:%.*]] = add nsw i64 [[PROMOTED1]], 1 +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S]] +; CHECK-NEXT: [[S2:%.*]] = add nsw i64 [[PROMOTED1]], 2 +; CHECK-NEXT: [[ADDR2:%.*]] = getelementptr inbounds i64, ptr [[BASE]], i64 [[S2]] +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_THEN2:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[ADDR]], align 8 +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[ADDR2]], align 8 +; CHECK-NEXT: [[R:%.*]] = add i64 [[V]], [[V2]] +; CHECK-NEXT: ret i64 [[R]] +; CHECK: if.then2: +; CHECK-NEXT: ret i64 0 +; +entry: + %add = add nsw i32 %i, 1 + %s = sext i32 %add to i64 + %addr = getelementptr inbounds i64, ptr %base, i64 %s + %add2 = add nsw i32 %i, 2 + %s2 = sext i32 %add2 to i64 + %addr2 = getelementptr inbounds i64, ptr %base, i64 %s2 + br i1 %cond, label %if.then, label %if.then2 +if.then: + %v = load i64, ptr %addr + %v2 = load i64, ptr %addr2 + %r = add i64 %v, %v2 + ret i64 %r +if.then2: + ret i64 0; +} From b57038a611329ec42858b714effb482cbfc4d4e1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 14 Aug 2024 20:03:45 -0700 Subject: [PATCH 21/47] [OpenMP] Use range-based for loops (NFC) (#103511) --- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 83fec194d73904..f9b070e6f1eae4 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -2022,8 +2022,8 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc, Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; }); } - llvm::for_each(llvm::reverse(ToBeDeleted), - [](Instruction *I) { I->eraseFromParent(); }); + for (Instruction *I : llvm::reverse(ToBeDeleted)) + I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); @@ -7049,8 +7049,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask( } StaleCI->eraseFromParent(); - llvm::for_each(llvm::reverse(ToBeDeleted), - [](Instruction *I) { I->eraseFromParent(); }); + for (Instruction *I : llvm::reverse(ToBeDeleted)) + I->eraseFromParent(); }; addOutlineInfo(std::move(OI)); @@ -8345,9 +8345,8 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, omp::RuntimeFunction::OMPRTL___kmpc_fork_teams), Args); - llvm::for_each(llvm::reverse(ToBeDeleted), - [](Instruction *I) { I->eraseFromParent(); }); - + for (Instruction *I : llvm::reverse(ToBeDeleted)) + I->eraseFromParent(); }; if (!Config.isTargetDevice()) From b4dc9869381f91af419ec170837ac324d09525e5 Mon Sep 17 00:00:00 2001 From: Daniel Wedzicha <55595431+boredhuman@users.noreply.github.com> Date: Thu, 15 Aug 2024 00:23:40 -0400 Subject: [PATCH 22/47] [LLDB][OSX] Removed semi colon generating a warning during build (#104398) Singular warning I noticed when compiling lldb. Co-authored-by: Daniel --- lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index b714f7be187aca..d27bd1b7426e6c 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -128,7 +128,7 @@ static void ParseOSVersion(llvm::VersionTuple &version, NSString *Key) { static bool ResolveAndVerifyCandidateSupportDir(FileSpec &path) { FileSystem::Instance().Resolve(path); return FileSystem::Instance().IsDirectory(path); -}; +} bool HostInfoMacOSX::ComputeSupportExeDirectory(FileSpec &file_spec) { FileSpec lldb_file_spec = GetShlibDir(); From e0d173d44161bf9b68243845666d58999e74f759 Mon Sep 17 00:00:00 2001 From: Max Winkler Date: Wed, 14 Aug 2024 21:51:57 -0700 Subject: [PATCH 23/47] [Clang] [AST] Fix placeholder return type name mangling for MSVC 1920+ / VS2019+ (#102848) Partial fix for https://github.com/llvm/llvm-project/issues/92204. This PR just fixes VS2019+ since that is the suite of compilers that I require link compatibility with at the moment. I still intend to fix VS2017 and to update llvm-undname in future PRs. Once those are also finished and merged I'll close out https://github.com/llvm/llvm-project/issues/92204. I am hoping to get the llvm-undname PR up in a couple of weeks to be able to demangle the VS2019+ name mangling. MSVC 1920+ mangles placeholder return types for non-templated functions with "@". For example `auto foo() { return 0; }` is mangled as `?foo@@YA@XZ`. MSVC 1920+ mangles placeholder return types for templated functions as the qualifiers of the AutoType followed by "_P" for `auto` and "_T" for `decltype(auto)`. For example `template auto foo() { return 0; }` is mangled as `??$foo@H@@YA?A_PXZ` when `foo` is instantiated as follows `foo()`. Lambdas with placeholder return types are still mangled with clang's custom mangling since MSVC lambda mangling hasn't been deciphered yet. Similarly any pointers in the return type with an address space are mangled with clang's custom mangling since that is a clang extension. We cannot augment `mangleType` to support this mangling scheme as the mangling schemes for variables and functions differ. auto variables are encoded with the fully deduced type where auto return types are not. The following two functions with a static variable are mangled the same ``` template int test() { static int i = 0; // "?i@?1???$test@H@@YAHXZ@4HA" return i; } template int test() { static auto i = 0; // "?i@?1???$test@H@@YAHXZ@4HA" return i; } ``` Inside `mangleType` once we get to mangling the `AutoType` we have no context if we are from a variable encoding or some other encoding. Therefore it was easier to handle any special casing for `AutoType` return types with a separate function instead of using the `mangleType` infrastructure. --- clang/docs/ReleaseNotes.rst | 2 + clang/lib/AST/MicrosoftMangle.cpp | 161 +++++++- .../test/CodeGenCXX/mangle-ms-auto-return.cpp | 369 ++++++++++++++++++ .../mangle-ms-auto-templates-memptrs.cpp | 12 +- .../mangle-ms-auto-templates-nullptr.cpp | 2 +- .../CodeGenCXX/mangle-ms-auto-templates.cpp | 6 +- 6 files changed, 533 insertions(+), 19 deletions(-) create mode 100644 clang/test/CodeGenCXX/mangle-ms-auto-return.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f5696d6ce15dc7..b1864901e7bddb 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -77,6 +77,8 @@ C++ Specific Potentially Breaking Changes ABI Changes in This Version --------------------------- +- Fixed Microsoft name mangling of placeholder, auto and decltype(auto), return types for MSVC 1920+. This change resolves incompatibilities with code compiled by MSVC 1920+ but will introduce incompatibilities with code compiled by earlier versions of Clang unless such code is built with the compiler option -fms-compatibility-version=19.14 to imitate the MSVC 1914 mangling behavior. + AST Dumping Potentially Breaking Changes ---------------------------------------- diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index ed8d1cf1b98dd8..a113574675b4c5 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -408,6 +408,8 @@ class MicrosoftCXXNameMangler { void mangleSourceName(StringRef Name); void mangleNestedName(GlobalDecl GD); + void mangleAutoReturnType(QualType T, QualifierMangleMode QMM); + private: bool isStructorDecl(const NamedDecl *ND) const { return ND == Structor || getStructor(ND) == Structor; @@ -477,6 +479,11 @@ class MicrosoftCXXNameMangler { SourceRange Range); void mangleObjCKindOfType(const ObjCObjectType *T, Qualifiers Quals, SourceRange Range); + + void mangleAutoReturnType(const MemberPointerType *T, Qualifiers Quals); + void mangleAutoReturnType(const PointerType *T, Qualifiers Quals); + void mangleAutoReturnType(const LValueReferenceType *T, Qualifiers Quals); + void mangleAutoReturnType(const RValueReferenceType *T, Qualifiers Quals); }; } @@ -2494,6 +2501,57 @@ void MicrosoftCXXNameMangler::mangleAddressSpaceType(QualType T, mangleArtificialTagType(TagTypeKind::Struct, ASMangling, {"__clang"}); } +void MicrosoftCXXNameMangler::mangleAutoReturnType(QualType T, + QualifierMangleMode QMM) { + assert(getASTContext().getLangOpts().isCompatibleWithMSVC( + LangOptions::MSVC2019) && + "Cannot mangle MSVC 2017 auto return types!"); + + if (isa(T)) { + const auto *AT = T->getContainedAutoType(); + Qualifiers Quals = T.getLocalQualifiers(); + + if (QMM == QMM_Result) + Out << '?'; + if (QMM != QMM_Drop) + mangleQualifiers(Quals, false); + Out << (AT->isDecltypeAuto() ? "_T" : "_P"); + return; + } + + T = T.getDesugaredType(getASTContext()); + Qualifiers Quals = T.getLocalQualifiers(); + + switch (QMM) { + case QMM_Drop: + case QMM_Result: + break; + case QMM_Mangle: + mangleQualifiers(Quals, false); + break; + default: + llvm_unreachable("QMM_Escape unexpected"); + } + + const Type *ty = T.getTypePtr(); + switch (ty->getTypeClass()) { + case Type::MemberPointer: + mangleAutoReturnType(cast(ty), Quals); + break; + case Type::Pointer: + mangleAutoReturnType(cast(ty), Quals); + break; + case Type::LValueReference: + mangleAutoReturnType(cast(ty), Quals); + break; + case Type::RValueReference: + mangleAutoReturnType(cast(ty), Quals); + break; + default: + llvm_unreachable("Invalid type expected"); + } +} + void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range, QualifierMangleMode QMM) { // Don't use the canonical types. MSVC includes things like 'const' on @@ -2907,17 +2965,51 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // can differ by their calling convention and are typically deduced. So // we make sure that this type gets mangled properly. mangleType(ResultType, Range, QMM_Result); - } else if (const auto *AT = dyn_cast_or_null( - ResultType->getContainedAutoType())) { - Out << '?'; - mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); - Out << '?'; + } else if (IsInLambda) { + if (const auto *AT = ResultType->getContainedAutoType()) { + assert(AT->getKeyword() == AutoTypeKeyword::Auto && + "should only need to mangle auto!"); + Out << '?'; + mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); + Out << '?'; + mangleSourceName(""); + Out << '@'; + } else { + Out << '@'; + } + } else if (const auto *AT = ResultType->getContainedAutoType()) { assert(AT->getKeyword() != AutoTypeKeyword::GNUAutoType && "shouldn't need to mangle __auto_type!"); - mangleSourceName(AT->isDecltypeAuto() ? "" : ""); - Out << '@'; - } else if (IsInLambda) { - Out << '@'; + + // If we have any pointer types with the clang address space extension + // then defer to the custom clang mangling to keep backwards + // compatibility. See `mangleType(const PointerType *T, Qualifiers Quals, + // SourceRange Range)` for details. + auto UseClangMangling = [](QualType ResultType) { + QualType T = ResultType; + while (const auto *PT = dyn_cast(T.getTypePtr())) { + T = T->getPointeeType(); + if (T.getQualifiers().hasAddressSpace()) + return true; + } + return false; + }; + + if (getASTContext().getLangOpts().isCompatibleWithMSVC( + LangOptions::MSVC2019) && + !UseClangMangling(ResultType)) { + if (D && !D->getPrimaryTemplate()) { + Out << '@'; + } else { + mangleAutoReturnType(ResultType, QMM_Result); + } + } else { + Out << '?'; + mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); + Out << '?'; + mangleSourceName(AT->isDecltypeAuto() ? "" : ""); + Out << '@'; + } } else { if (ResultType->isVoidType()) ResultType = ResultType.getUnqualifiedType(); @@ -4220,6 +4312,57 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL, Mangler.getStream() << '@'; } +void MicrosoftCXXNameMangler::mangleAutoReturnType(const MemberPointerType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + manglePointerCVQualifiers(Quals); + manglePointerExtQualifiers(Quals, PointeeType); + if (const FunctionProtoType *FPT = PointeeType->getAs()) { + Out << '8'; + mangleName(T->getClass()->castAs()->getDecl()); + mangleFunctionType(FPT, nullptr, true); + } else { + mangleQualifiers(PointeeType.getQualifiers(), true); + mangleName(T->getClass()->castAs()->getDecl()); + mangleAutoReturnType(PointeeType, QMM_Drop); + } +} + +void MicrosoftCXXNameMangler::mangleAutoReturnType(const PointerType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + assert(!PointeeType.getQualifiers().hasAddressSpace() && + "Unexpected address space mangling required"); + + manglePointerCVQualifiers(Quals); + manglePointerExtQualifiers(Quals, PointeeType); + + if (const FunctionProtoType *FPT = PointeeType->getAs()) { + Out << '6'; + mangleFunctionType(FPT); + } else { + mangleAutoReturnType(PointeeType, QMM_Mangle); + } +} + +void MicrosoftCXXNameMangler::mangleAutoReturnType(const LValueReferenceType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!"); + Out << 'A'; + manglePointerExtQualifiers(Quals, PointeeType); + mangleAutoReturnType(PointeeType, QMM_Mangle); +} + +void MicrosoftCXXNameMangler::mangleAutoReturnType(const RValueReferenceType *T, + Qualifiers Quals) { + QualType PointeeType = T->getPointeeType(); + assert(!Quals.hasConst() && !Quals.hasVolatile() && "unexpected qualifier!"); + Out << "$$Q"; + manglePointerExtQualifiers(Quals, PointeeType); + mangleAutoReturnType(PointeeType, QMM_Mangle); +} + MicrosoftMangleContext *MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags, bool IsAux) { diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp new file mode 100644 index 00000000000000..737c9c407f4703 --- /dev/null +++ b/clang/test/CodeGenCXX/mangle-ms-auto-return.cpp @@ -0,0 +1,369 @@ +// RUN: %clang_cc1 -std=c++17 -fms-compatibility-version=19.20 -emit-llvm %s -o - -fms-extensions -fdelayed-template-parsing -triple=x86_64-pc-windows-msvc | FileCheck %s + +struct StructA {}; + +template +auto AutoT() { return T(); } + +template +const auto AutoConstT() { return T(); } + +template +volatile auto AutoVolatileT() { return T(); } + +template +const volatile auto AutoConstVolatileT() { return T(); } + +// The qualifiers of the return type should always be emitted even for void types. +// Void types usually have their qualifers stripped in the mangled name for MSVC ABI. +void test_template_auto_void() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@X@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CBX@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CCX@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CDX@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@X@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@X@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@X@@YA?D_PXZ" +} + +void test_template_auto_int() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@H@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CBH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CCH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CDH@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@H@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@H@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@H@@YA?D_PXZ" +} + +void test_template_auto_struct() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@UStructA@@@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@$$CBUStructA@@@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@UStructA@@@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@UStructA@@@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@UStructA@@@@YA?D_PXZ" +} + +void test_template_auto_ptr() { + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@PEAH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@PEBH@@YA?A_PXZ" + + AutoT(); + // CHECK: call {{.*}} @"??$AutoT@QEBH@@YA?A_PXZ" + + AutoConstT(); + // CHECK: call {{.*}} @"??$AutoConstT@PEAH@@YA?B_PXZ" + + AutoVolatileT(); + // CHECK: call {{.*}} @"??$AutoVolatileT@PEAH@@YA?C_PXZ" + + AutoConstVolatileT(); + // CHECK: call {{.*}} @"??$AutoConstVolatileT@PEAH@@YA?D_PXZ" +} + +template +auto* PtrAutoT() { return T(); } + +template +const auto* PtrAutoConstT() { return T(); } + +template +volatile auto* PtrAutoVolatileT() { return T(); } + +template +const volatile auto* PtrAutoConstVolatileT() { return T(); } + +void test_template_ptr_auto() { + PtrAutoT(); + // CHECK: call {{.*}} @"??$PtrAutoT@PEAH@@YAPEA_PXZ" + + PtrAutoT(); + // CHECK: call {{.*}} @"??$PtrAutoT@PEBH@@YAPEA_PXZ" + + PtrAutoT(); + // CHECK: call {{.*}} @"??$PtrAutoT@QEBH@@YAPEA_PXZ" + + PtrAutoConstT(); + // CHECK: call {{.*}} @"??$PtrAutoConstT@PEAH@@YAPEB_PXZ" + + PtrAutoVolatileT(); + // CHECK: call {{.*}} @"??$PtrAutoVolatileT@PEAH@@YAPEC_PXZ" + + PtrAutoConstVolatileT(); + // CHECK: call {{.*}} @"??$PtrAutoConstVolatileT@PEAH@@YAPED_PXZ" +} + +int func_int(); +const int func_constint(); +void func_void(); +int* func_intptr(); + +template +auto (*FuncPtrAutoT())() { return v; } + +void test_template_func_ptr_auto() { + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AHXZ$1?func_int@@YAHXZ@@YAP6A?A_PXZXZ" + + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6A?BHXZ$1?func_constint@@YA?BHXZ@@YAP6A?A_PXZXZ" + + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6AXXZ$1?func_void@@YAXXZ@@YAP6A?A_PXZXZ" + + FuncPtrAutoT(); + // CHECK: call {{.*}} @"??$FuncPtrAutoT@P6APEAHXZ$1?func_intptr@@YAPEAHXZ@@YAP6A?A_PXZXZ" +} + +template +auto& RefAutoT(T& x) { return x; } + +template +const auto& ConstRefAutoT(T& x) { return x; } + +template +auto&& RRefAutoT(T& x) { return static_cast(x); } + +void test_template_ref_auto() { + int x; + + RefAutoT(x); + // CHECK: call {{.*}} @"??$RefAutoT@H@@YAAEA_PAEAH@Z" + + ConstRefAutoT(x); + // CHECK: call {{.*}} @"??$ConstRefAutoT@H@@YAAEB_PAEAH@Z" + + RRefAutoT(x); + // CHECK: call {{.*}} @"??$RRefAutoT@H@@YA$$QEA_PAEAH@Z" +} + +template +decltype(auto) DecltypeAutoT() { return T(); } + +template +decltype(auto) DecltypeAutoT2(T& x) { return static_cast(x); } + +void test_template_decltypeauto() { + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@X@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBX@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCX@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDX@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@H@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CBH@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CCH@@YA?A_TXZ" + + DecltypeAutoT(); + // CHECK: call {{.*}} @"??$DecltypeAutoT@$$CDH@@YA?A_TXZ" + + int x; + + DecltypeAutoT2(x); + // CHECK: call {{.*}} @"??$DecltypeAutoT2@H@@YA?A_TAEAH@Z" +} + +// Still want to use clang's custom mangling for lambdas to keep backwards compatibility until +// MSVC lambda name mangling has been deciphered. +void test_lambda() { + auto lambdaIntRetAuto = []() { return 0; }; + lambdaIntRetAuto(); + // CHECK: call {{.*}} @"??R@?0??test_lambda@@YAXXZ@QEBA?A?@@XZ" + + auto lambdaIntRet = []() -> int { return 0; }; + lambdaIntRet(); + // CHECK: call {{.*}} @"??R@?0??test_lambda@@YAXXZ@QEBA@XZ" + + auto lambdaGenericIntIntRetAuto = [](auto a) { return a; }; + lambdaGenericIntIntRetAuto(0); + // CHECK: call {{.*}} @"??$?RH@@?0??test_lambda@@YAXXZ@QEBA?A?@@H@Z" +} + +auto TestTrailingInt() -> int { + return 0; +} + +auto TestTrailingConstVolatileVoid() -> const volatile void { +} + +auto TestTrailingStructA() -> StructA { + return StructA{}; +} + +void test_trailing_return() { + TestTrailingInt(); + // CHECK: call {{.*}} @"?TestTrailingInt@@YAHXZ" + + TestTrailingConstVolatileVoid(); + // CHECK: call {{.*}} @"?TestTrailingConstVolatileVoid@@YAXXZ" + + TestTrailingStructA(); + // CHECK: call {{.*}} @"?TestTrailingStructA@@YA?AUStructA@@XZ" +} + +auto TestNonTemplateAutoInt() { + return 0; +} + +auto TestNonTemplateAutoVoid() { + return; +} + +auto TestNonTemplateAutoStructA() { + return StructA{}; +} + +const auto TestNonTemplateConstAutoInt() { + return 0; +} + +const auto TestNonTemplateConstAutoVoid() { + return; +} + +const auto TestNonTemplateConstAutoStructA() { + return StructA{}; +} + +void test_nontemplate_auto() { + TestNonTemplateAutoInt(); + // CHECK: call {{.*}} @"?TestNonTemplateAutoInt@@YA@XZ" + + TestNonTemplateAutoVoid(); + // CHECK: call {{.*}} @"?TestNonTemplateAutoVoid@@YA@XZ" + + TestNonTemplateAutoStructA(); + // CHECK: call {{.*}} @"?TestNonTemplateAutoStructA@@YA@XZ" + + TestNonTemplateConstAutoInt(); + // CHECK: call {{.*}} @"?TestNonTemplateConstAutoInt@@YA@XZ" + + TestNonTemplateConstAutoVoid(); + // CHECK: call {{.*}} @"?TestNonTemplateConstAutoVoid@@YA@XZ" + + TestNonTemplateConstAutoStructA(); + // CHECK: call {{.*}} @"?TestNonTemplateConstAutoStructA@@YA@XZ" +} + +decltype(auto) TestNonTemplateDecltypeAutoInt() { + return 0; +} + +decltype(auto) TestNonTemplateDecltypeAutoVoid() { + return; +} + +decltype(auto) TestNonTemplateDecltypeAutoStructA() { + return StructA{}; +} + +void test_nontemplate_decltypeauto() { + TestNonTemplateDecltypeAutoInt(); + // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoInt@@YA@XZ" + + TestNonTemplateDecltypeAutoVoid(); + // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoVoid@@YA@XZ" + + TestNonTemplateDecltypeAutoStructA(); + // CHECK: call {{.*}} @"?TestNonTemplateDecltypeAutoStructA@@YA@XZ" +} + +struct StructB { + int x; +}; + +template +auto StructB::* AutoMemberDataPtrT(T x) { return x; } + +template +const auto StructB::* AutoConstMemberDataPtrT(T x) { return x; } + +void test_template_auto_member_data_ptr() { + AutoMemberDataPtrT(&StructB::x); + // CHECK: call {{.*}} @"??$AutoMemberDataPtrT@PEQStructB@@H@@YAPEQStructB@@_PPEQ0@H@Z" + + AutoConstMemberDataPtrT(&StructB::x); + // CHECK: call {{.*}} @"??$AutoConstMemberDataPtrT@PEQStructB@@H@@YAPERStructB@@_PPEQ0@H@Z" +} + +struct StructC { + void test() {} +}; + +struct StructD { + const int test() { return 0; } +}; + +template +auto (StructC::*AutoMemberFuncPtrT(T x))() { return x; } + +template +const auto (StructD::*AutoConstMemberFuncPtrT(T x))() { return x; } + +void test_template_auto_member_func_ptr() { + AutoMemberFuncPtrT(&StructC::test); + // CHECK: call {{.*}} @"??$AutoMemberFuncPtrT@P8StructC@@EAAXXZ@@YAP8StructC@@EAA?A_PXZP80@EAAXXZ@Z" + + AutoConstMemberFuncPtrT(&StructD::test); + // CHECK: call {{.*}} @"??$AutoConstMemberFuncPtrT@P8StructD@@EAA?BHXZ@@YAP8StructD@@EAA?B_PXZP80@EAA?BHXZ@Z" +} + +template +auto * __attribute__((address_space(1))) * AutoPtrAddressSpaceT() { + T * __attribute__((address_space(1))) * p = nullptr; + return p; +} + +void test_template_auto_address_space_ptr() { + AutoPtrAddressSpaceT(); + // CHECK: call {{.*}} @"??$AutoPtrAddressSpaceT@H@@YA?A?@@XZ" +} diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp index 360ebdecc5562b..b7bc3953f0b438 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-memptrs.cpp @@ -34,15 +34,15 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$I?f@V@@QEAAXXZA@A@@@QEAA@XZ" AutoFunc<&S::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8S@@EAAXXZ1?f@1@QEAAXXZ@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?f@S@@QEAAXXZ@@YA?A?@@XZ" AutoFunc<&M::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8M@@EAAXXZH?f@1@QEAAXXZA@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$H?f@M@@QEAAXXZA@@@YA?A?@@XZ" AutoFunc<&V::f>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP8V@@EAAXXZI?f@1@QEAAXXZA@A@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$I?f@V@@QEAAXXZA@A@@@YA?A?@@XZ" AutoParmTemplate<&S::a> auto_data_single_inheritance; @@ -58,14 +58,14 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$FBA@A@@@QEAA@XZ" AutoFunc<&S::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQS@@H07@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$07@@YA?A?@@XZ" AutoFunc<&M::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQM@@H0M@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$0M@@@YA?A?@@XZ" AutoFunc<&V::a>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEQV@@HFBA@A@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$FBA@A@@@YA?A?@@XZ" } diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp index 8f98c1e59f73d7..251d9219c01ce2 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates-nullptr.cpp @@ -19,6 +19,6 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmTemplate@$0A@@@QEAA@XZ" AutoFunc(); - // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$M$$T0A@@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$0A@@@YA?A?@@XZ" } diff --git a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp index ff5395cea75eb7..effcc31ee31103 100644 --- a/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp +++ b/clang/test/CodeGenCXX/mangle-ms-auto-templates.cpp @@ -26,7 +26,7 @@ int j; void template_mangling() { AutoFunc<1>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MH00@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$00@@YA?A?@@XZ" AutoParmTemplate<0> auto_int; // AFTER: call {{.*}} @"??0?$AutoParmTemplate@$MH0A@@@QEAA@XZ" @@ -52,7 +52,7 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$00$0HPPPPPPPPPPPPPPP@@@QEAA@XZ" AutoFunc<&i>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MPEAH1?i@@3HA@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?i@@3HA@@YA?A?@@XZ" AutoParmTemplate<&i> auto_int_ptr; @@ -64,7 +64,7 @@ void template_mangling() { // BEFORE: call {{.*}} @"??0?$AutoParmsTemplate@$1?i@@3HA$1?j@@3HA@@QEAA@XZ" AutoFunc<&Func>(); - // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A?@@XZ" + // AFTER: call {{.*}} @"??$AutoFunc@$MP6AHXZ1?Func@@YAHXZ@@YA?A_PXZ" // BEFORE: call {{.*}} @"??$AutoFunc@$1?Func@@YAHXZ@@YA?A?@@XZ" AutoParmTemplate<&Func> auto_func_ptr; From 6e2d9df02502e16659e4a9397260baf9df224f17 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 14 Aug 2024 22:14:29 -0700 Subject: [PATCH 24/47] Fix testcases. Use -emit-llvm and not -S. Use LABEL checking. --- clang/test/CodeGen/overflow-idiom-exclusion-fp.c | 4 ++-- clang/test/CodeGen/overflow-idiom-exclusion.c | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c index 1d26caa5b4f54b..511a88cc7a2836 100644 --- a/clang/test/CodeGen/overflow-idiom-exclusion-fp.c +++ b/clang/test/CodeGen/overflow-idiom-exclusion-fp.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s // Check for potential false positives from patterns that _almost_ match classic overflow-dependent or overflow-prone code patterns extern unsigned a, b, c; diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c index 02dd3ef3ae42da..c4756a0b80f61e 100644 --- a/clang/test/CodeGen/overflow-idiom-exclusion.c +++ b/clang/test/CodeGen/overflow-idiom-exclusion.c @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -S -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -S -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -S -o - %s | FileCheck %s --check-prefix=ADD -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -S -o - %s | FileCheck %s --check-prefix=NEGATE -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -S -o - %s | FileCheck %s --check-prefix=WHILE +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE // Ensure some common overflow-dependent or overflow-prone code patterns don't // trigger the overflow sanitizers. In many cases, overflow warnings caused by @@ -32,6 +32,7 @@ // NEGATE: usub.with.overflow // NEGATE-NOT: negate_overflow +// WHILE-LABEL: @basic_commutativity // WHILE: handler.add_overflow // WHILE: negate_overflow // WHILE-NOT: usub.with.overflow @@ -57,6 +58,7 @@ void basic_commutativity(void) { c = 9; } +// WHILE-LABEL: @arguments_and_commutativity void arguments_and_commutativity(unsigned V1, unsigned V2) { if (V1 + V2 < V1) c = 9; From 94b8b11ac305ebe730e6b70b2463811de395cb40 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 15 Aug 2024 05:27:19 +0000 Subject: [PATCH 25/47] [Clang][NFC] Move FindCountedByField into FieldDecl (#104235) FindCountedByField can be used in more places than CodeGen. Move it into FieldDecl to avoid layering issues. --- clang/include/clang/AST/Decl.h | 4 ++++ clang/lib/AST/Decl.cpp | 13 +++++++++++++ clang/lib/CodeGen/CGBuiltin.cpp | 2 +- clang/lib/CodeGen/CGExpr.cpp | 18 +----------------- clang/lib/CodeGen/CodeGenFunction.h | 4 ---- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 561a9d872acfb0..6d84bd03de810a 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -3206,6 +3206,10 @@ class FieldDecl : public DeclaratorDecl, public Mergeable { /// Set the C++11 in-class initializer for this member. void setInClassInitializer(Expr *NewInit); + /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns + /// \p nullptr if either the attribute or the field doesn't exist. + const FieldDecl *findCountedByField() const; + private: void setLazyInClassInitializer(LazyDeclStmtPtr NewInit); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index e125143bc1b270..90caf81757ac96 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -4678,6 +4678,19 @@ void FieldDecl::printName(raw_ostream &OS, const PrintingPolicy &Policy) const { DeclaratorDecl::printName(OS, Policy); } +const FieldDecl *FieldDecl::findCountedByField() const { + const auto *CAT = getType()->getAs(); + if (!CAT) + return nullptr; + + const auto *CountDRE = cast(CAT->getCountExpr()); + const auto *CountDecl = CountDRE->getDecl(); + if (const auto *IFD = dyn_cast(CountDecl)) + CountDecl = IFD->getAnonField(); + + return dyn_cast(CountDecl); +} + //===----------------------------------------------------------------------===// // TagDecl Implementation //===----------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b5e5240e55be3f..1c0baeaee03632 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -987,7 +987,7 @@ CodeGenFunction::emitFlexibleArrayMemberSize(const Expr *E, unsigned Type, // attribute. return nullptr; - const FieldDecl *CountedByFD = FindCountedByField(FAMDecl); + const FieldDecl *CountedByFD = FAMDecl->findCountedByField(); if (!CountedByFD) // Can't find the field referenced by the "counted_by" attribute. return nullptr; diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index f93f8dda0bd29a..0672861790633b 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1150,22 +1150,6 @@ llvm::Value *CodeGenFunction::EmitLoadOfCountedByField( getIntAlign(), "..counted_by.load"); } -const FieldDecl *CodeGenFunction::FindCountedByField(const FieldDecl *FD) { - if (!FD) - return nullptr; - - const auto *CAT = FD->getType()->getAs(); - if (!CAT) - return nullptr; - - const auto *CountDRE = cast(CAT->getCountExpr()); - const auto *CountDecl = CountDRE->getDecl(); - if (const auto *IFD = dyn_cast(CountDecl)) - CountDecl = IFD->getAnonField(); - - return dyn_cast(CountDecl); -} - void CodeGenFunction::EmitBoundsCheck(const Expr *E, const Expr *Base, llvm::Value *Index, QualType IndexType, bool Accessed) { @@ -4305,7 +4289,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, ME->isFlexibleArrayMemberLike(getContext(), StrictFlexArraysLevel) && ME->getMemberDecl()->getType()->isCountAttributedType()) { const FieldDecl *FAMDecl = dyn_cast(ME->getMemberDecl()); - if (const FieldDecl *CountFD = FindCountedByField(FAMDecl)) { + if (const FieldDecl *CountFD = FAMDecl->findCountedByField()) { if (std::optional Diff = getOffsetDifferenceInBits(*this, CountFD, FAMDecl)) { CharUnits OffsetDiff = CGM.getContext().toCharUnitsFromBits(*Diff); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 19a7feeb69d820..57e0b7f91e9bf8 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3305,10 +3305,6 @@ class CodeGenFunction : public CodeGenTypeCache { const FieldDecl *FAMDecl, uint64_t &Offset); - /// Find the FieldDecl specified in a FAM's "counted_by" attribute. Returns - /// \p nullptr if either the attribute or the field doesn't exist. - const FieldDecl *FindCountedByField(const FieldDecl *FD); - /// Build an expression accessing the "counted_by" field. llvm::Value *EmitLoadOfCountedByField(const Expr *Base, const FieldDecl *FAMDecl, From 07a8cbaf8dc16bebf6e875173d20299d9cc47cc5 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 14 Aug 2024 22:51:08 -0700 Subject: [PATCH 26/47] Remove failing test until it can be fixed properly. --- clang/test/CodeGen/overflow-idiom-exclusion.c | 153 ------------------ 1 file changed, 153 deletions(-) delete mode 100644 clang/test/CodeGen/overflow-idiom-exclusion.c diff --git a/clang/test/CodeGen/overflow-idiom-exclusion.c b/clang/test/CodeGen/overflow-idiom-exclusion.c deleted file mode 100644 index c4756a0b80f61e..00000000000000 --- a/clang/test/CodeGen/overflow-idiom-exclusion.c +++ /dev/null @@ -1,153 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=all -fwrapv -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=add-overflow-test -emit-llvm -o - %s | FileCheck %s --check-prefix=ADD -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=negated-unsigned-const -emit-llvm -o - %s | FileCheck %s --check-prefix=NEGATE -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fsanitize-overflow-pattern-exclusion=post-decr-while -emit-llvm -o - %s | FileCheck %s --check-prefix=WHILE - -// Ensure some common overflow-dependent or overflow-prone code patterns don't -// trigger the overflow sanitizers. In many cases, overflow warnings caused by -// these patterns are seen as "noise" and result in users turning off -// sanitization all together. - -// A pattern like "if (a + b < a)" simply checks for overflow and usually means -// the user is trying to handle it gracefully. - -// Similarly, a pattern resembling "while (i--)" is extremely common and -// warning on its inevitable overflow can be seen as superfluous. Do note that -// using "i" in future calculations can be tricky because it will still -// wrap-around. - -// Another common pattern that, in some cases, is found to be too noisy is -// unsigned negation, for example: -// unsigned long A = -1UL; - - -// CHECK-NOT: handle{{.*}}overflow - -// ADD: usub.with.overflow -// ADD: negate_overflow -// ADD-NOT: handler.add_overflow - -// NEGATE: handler.add_overflow -// NEGATE: usub.with.overflow -// NEGATE-NOT: negate_overflow - -// WHILE-LABEL: @basic_commutativity -// WHILE: handler.add_overflow -// WHILE: negate_overflow -// WHILE-NOT: usub.with.overflow -extern unsigned a, b, c; -extern unsigned some(void); - -void basic_commutativity(void) { - if (a + b < a) - c = 9; - if (a + b < b) - c = 9; - if (b + a < b) - c = 9; - if (b + a < a) - c = 9; - if (a > a + b) - c = 9; - if (a > b + a) - c = 9; - if (b > a + b) - c = 9; - if (b > b + a) - c = 9; -} - -// WHILE-LABEL: @arguments_and_commutativity -void arguments_and_commutativity(unsigned V1, unsigned V2) { - if (V1 + V2 < V1) - c = 9; - if (V1 + V2 < V2) - c = 9; - if (V2 + V1 < V2) - c = 9; - if (V2 + V1 < V1) - c = 9; - if (V1 > V1 + V2) - c = 9; - if (V1 > V2 + V1) - c = 9; - if (V2 > V1 + V2) - c = 9; - if (V2 > V2 + V1) - c = 9; -} - -void pointers(unsigned *P1, unsigned *P2, unsigned V1) { - if (*P1 + *P2 < *P1) - c = 9; - if (*P1 + V1 < V1) - c = 9; - if (V1 + *P2 < *P2) - c = 9; -} - -struct OtherStruct { - unsigned foo, bar; -}; - -struct MyStruct { - unsigned base, offset; - struct OtherStruct os; -}; - -extern struct MyStruct ms; - -void structs(void) { - if (ms.base + ms.offset < ms.base) - c = 9; -} - -void nestedstructs(void) { - if (ms.os.foo + ms.os.bar < ms.os.foo) - c = 9; -} - -// Normally, this would be folded into a simple call to the overflow handler -// and a store. Excluding this pattern results in just a store. -void constants(void) { - unsigned base = 4294967295; - unsigned offset = 1; - if (base + offset < base) - c = 9; -} - -void common_while(unsigned i) { - // This post-decrement usually causes overflow sanitizers to trip on the very - // last operation. - while (i--) { - some(); - } -} - -// Normally, these assignments would trip the unsigned overflow sanitizer. -void negation(void) { -#define SOME -1UL - unsigned long A = -1UL; - unsigned long B = -2UL; - unsigned long C = -3UL; - unsigned long D = -SOME; - (void)A;(void)B;(void)C;(void)D; -} - -// cvise'd kernel code that caused problems during development due to sign -// extension -typedef unsigned long _size_t; -int qnbytes; -int *key_alloc_key; -_size_t key_alloc_quotalen; -int *key_alloc(void) { - if (qnbytes + key_alloc_quotalen < qnbytes) - return key_alloc_key; - return key_alloc_key + 3;; -} - -void function_call(void) { - if (b + some() < b) - c = 9; -} From fb9e685fc41b8abc87725f8509624b3a80330dee Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Thu, 15 Aug 2024 14:09:36 +0800 Subject: [PATCH 27/47] Intrinsic: introduce minimumnum and maximumnum for IR and SelectionDAG (#96649) C23 introduced new functions fminimum_num and fmaximum_num, and they follow the minimumNumber and maximumNumber of IEEE754-2019. Let's introduce new intrinsics to support them. This patch introduces support only support for scalar values. The support of vector (vp, vp.reduce, vector.reduce), experimental.constrained will be added in future patches. With this patch, MIPSr6 and LoongArch can work out of box with fcanonical and fmax/fmin. Aarch64/PowerPC64 can use the same login as MIPSr6 and LoongArch, while they have no fcanonical support yet. I will add it in future patches. The FMIN/FMAX of RISC-V instructions follows the minimumNumber/maximumNumber of IEEE754-2019. We can just add it in future patch. Background https://discourse.llvm.org/t/rfc-fix-llvm-min-f-and-llvm-max-f-intrinsics/79735 Currently we have fminnum/fmaxnum, which have different behavior on different platform for NUM vs sNaN: 1) Fallback to fmin(3)/fmax(3): return qNaN. 2) ARM64/ARM32+Neon: same as libc. 3) MIPSr6/LoongArch/RISC-V: return NUM. And the fix of fminnum/fmaxnum to follow minNUM/maxNUM of IEEE754-2008 will submit as separated patches. --- llvm/docs/LangRef.rst | 182 ++++++++ .../llvm/Analysis/TargetLibraryInfo.def | 33 ++ llvm/include/llvm/CodeGen/BasicTTIImpl.h | 6 + llvm/include/llvm/CodeGen/ISDOpcodes.h | 5 + llvm/include/llvm/CodeGen/TargetLowering.h | 5 + llvm/include/llvm/IR/IRBuilder.h | 12 + llvm/include/llvm/IR/IntrinsicInst.h | 2 + llvm/include/llvm/IR/Intrinsics.td | 8 + llvm/include/llvm/IR/RuntimeLibcalls.def | 10 + .../include/llvm/Target/TargetSelectionDAG.td | 4 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 17 + .../SelectionDAG/LegalizeFloatTypes.cpp | 42 ++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 24 + .../SelectionDAG/SelectionDAGDumper.cpp | 2 + .../CodeGen/SelectionDAG/TargetLowering.cpp | 88 ++++ llvm/lib/CodeGen/TargetLoweringBase.cpp | 1 + .../LoongArch/fp-maximumnum-minimumnum.ll | 431 ++++++++++++++++++ .../CodeGen/Mips/fp-maximumnum-minimumnum.ll | 132 ++++++ .../tools/llvm-tli-checker/ps4-tli-check.yaml | 16 +- .../Analysis/TargetLibraryInfoTest.cpp | 6 + 23 files changed, 1036 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll create mode 100644 llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 0ee4d7b444cfcf..5e5e9b9e8a93b1 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -16131,6 +16131,96 @@ The returned value is completely identical to the input except for the sign bit; in particular, if the input is a NaN, then the quiet/signaling bit and payload are perfectly preserved. +.. _i_fminmax_family: + +'``llvm.min.*``' Intrinsics Comparation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Standard: +""""""""" + +IEEE754 and ISO C define some min/max operations, and they have some differences +on working with qNaN/sNaN and +0.0/-0.0. Here is the list: + +.. list-table:: + :header-rows: 2 + + * - ``ISO C`` + - fmin/fmax + - fmininum/fmaximum + - fminimum_num/fmaximum_num + + * - ``IEEE754`` + - minNum/maxNum (2008) + - minimum/maximum (2019) + - minimumNumber/maximumNumber (2019) + + * - ``+0.0 vs -0.0`` + - either one + - +0.0 > -0.0 + - +0.0 > -0.0 + + * - ``NUM vs sNaN`` + - qNaN, invalid exception + - qNaN, invalid exception + - NUM, invalid exception + + * - ``qNaN vs sNaN`` + - qNaN, invalid exception + - qNaN, invalid exception + - qNaN, invalid exception + + * - ``NUM vs qNaN`` + - NUM, no exception + - qNaN, no exception + - NUM, no exception + +LLVM Implementation: +"""""""""""""""""""" + +LLVM implements all ISO C flavors as listed in this table, except in the +default floating-point environment exceptions are ignored. The constrained +versions of the intrinsics respect the exception behavior. + +.. list-table:: + :header-rows: 1 + :widths: 16 28 28 28 + + * - Operation + - minnum/maxnum + - minimum/maximum + - minimumnum/maximumnum + + * - ``NUM vs qNaN`` + - NUM, no exception + - qNaN, no exception + - NUM, no exception + + * - ``NUM vs sNaN`` + - qNaN, invalid exception + - qNaN, invalid exception + - NUM, invalid exception + + * - ``qNaN vs sNaN`` + - qNaN, invalid exception + - qNaN, invalid exception + - qNaN, invalid exception + + * - ``sNaN vs sNaN`` + - qNaN, invalid exception + - qNaN, invalid exception + - qNaN, invalid exception + + * - ``+0.0 vs -0.0`` + - either one + - +0.0(max)/-0.0(min) + - +0.0(max)/-0.0(min) + + * - ``NUM vs NUM`` + - larger(max)/smaller(min) + - larger(max)/smaller(min) + - larger(max)/smaller(min) + .. _i_minnum: '``llvm.minnum.*``' Intrinsic @@ -16312,6 +16402,98 @@ of the two arguments. -0.0 is considered to be less than +0.0 for this intrinsic. Note that these are the semantics specified in the draft of IEEE 754-2019. +.. _i_minimumnum: + +'``llvm.minimumnum.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.minimumnum`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.minimumnum.f32(float %Val0, float %Val1) + declare double @llvm.minimumnum.f64(double %Val0, double %Val1) + declare x86_fp80 @llvm.minimumnum.f80(x86_fp80 %Val0, x86_fp80 %Val1) + declare fp128 @llvm.minimumnum.f128(fp128 %Val0, fp128 %Val1) + declare ppc_fp128 @llvm.minimumnum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1) + +Overview: +""""""""" + +The '``llvm.minimumnum.*``' intrinsics return the minimum of the two +arguments, not propagating NaNs and treating -0.0 as less than +0.0. + + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same +type. + +Semantics: +"""""""""" +If both operands are NaNs (including sNaN), returns qNaN. If one operand +is NaN (including sNaN) and another operand is a number, return the number. +Otherwise returns the lesser of the two arguments. -0.0 is considered to +be less than +0.0 for this intrinsic. + +Note that these are the semantics of minimumNumber specified in IEEE 754-2019. + +It has some differences with '``llvm.minnum.*``': +1)'``llvm.minnum.*``' will return qNaN if either operand is sNaN. +2)'``llvm.minnum*``' may return either one if we compare +0.0 vs -0.0. + +.. _i_maximumnum: + +'``llvm.maximumnum.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.maximumnum`` on any +floating-point or vector of floating-point type. Not all targets support +all types however. + +:: + + declare float @llvm.maximumnum.f32(float %Val0, float %Val1) + declare double @llvm.maximumnum.f64(double %Val0, double %Val1) + declare x86_fp80 @llvm.maximumnum.f80(x86_fp80 %Val0, x86_fp80 %Val1) + declare fp128 @llvm.maximumnum.f128(fp128 %Val0, fp128 %Val1) + declare ppc_fp128 @llvm.maximumnum.ppcf128(ppc_fp128 %Val0, ppc_fp128 %Val1) + +Overview: +""""""""" + +The '``llvm.maximumnum.*``' intrinsics return the maximum of the two +arguments, not propagating NaNs and treating -0.0 as less than +0.0. + + +Arguments: +"""""""""" + +The arguments and return value are floating-point numbers of the same +type. + +Semantics: +"""""""""" +If both operands are NaNs (including sNaN), returns qNaN. If one operand +is NaN (including sNaN) and another operand is a number, return the number. +Otherwise returns the greater of the two arguments. -0.0 is considered to +be less than +0.0 for this intrinsic. + +Note that these are the semantics of maximumNumber specified in IEEE 754-2019. + +It has some differences with '``llvm.maxnum.*``': +1)'``llvm.maxnum.*``' will return qNaN if either operand is sNaN. +2)'``llvm.maxnum*``' may return either one if we compare +0.0 vs -0.0. + .. _int_copysign: '``llvm.copysign.*``' Intrinsic diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def index 7be5bb04549c61..e1cb1e5c557eae 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def @@ -1388,6 +1388,39 @@ TLI_DEFINE_ENUM_INTERNAL(fminl) TLI_DEFINE_STRING_INTERNAL("fminl") TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same) +// Calls to fmaximum_num and fminimum_num library functions expand to the llvm.maximumnum and +// llvm.minimumnum intrinsics with the correct parameter types for the arguments +// (all types must match). +/// double fmaximum_num(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(fmaximum_num) +TLI_DEFINE_STRING_INTERNAL("fmaximum_num") +TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same) + +/// float fmaximum_numf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(fmaximum_numf) +TLI_DEFINE_STRING_INTERNAL("fmaximum_numf") +TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same) + +/// long double fmaximum_numl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(fmaximum_numl) +TLI_DEFINE_STRING_INTERNAL("fmaximum_numl") +TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same) + +/// double fminimum_num(double x, double y); +TLI_DEFINE_ENUM_INTERNAL(fminimum_num) +TLI_DEFINE_STRING_INTERNAL("fminimum_num") +TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same) + +/// float fminimum_numf(float x, float y); +TLI_DEFINE_ENUM_INTERNAL(fminimum_numf) +TLI_DEFINE_STRING_INTERNAL("fminimum_numf") +TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same) + +/// long double fminimum_numl(long double x, long double y); +TLI_DEFINE_ENUM_INTERNAL(fminimum_numl) +TLI_DEFINE_STRING_INTERNAL("fminimum_numl") +TLI_DEFINE_SIG_INTERNAL(Floating, Same, Same) + /// double fmod(double x, double y); TLI_DEFINE_ENUM_INTERNAL(fmod) TLI_DEFINE_STRING_INTERNAL("fmod") diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 279cfb5aa47d6f..77ddc10e8a0e76 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -2037,6 +2037,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { case Intrinsic::maximum: ISD = ISD::FMAXIMUM; break; + case Intrinsic::minimumnum: + ISD = ISD::FMINIMUMNUM; + break; + case Intrinsic::maximumnum: + ISD = ISD::FMAXIMUMNUM; + break; case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index 7305e3086fcd65..b8f8818a749528 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1047,6 +1047,11 @@ enum NodeType { FMINIMUM, FMAXIMUM, + /// FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with + /// FMINNUM_IEEE and FMAXNUM_IEEE besides if either operand is sNaN. + FMINIMUMNUM, + FMAXIMUMNUM, + /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index deb1d04df3400c..eda38cd8a564d6 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2908,6 +2908,8 @@ class TargetLoweringBase { case ISD::FMAXNUM_IEEE: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: case ISD::AVGFLOORS: case ISD::AVGFLOORU: case ISD::AVGCEILS: @@ -5283,6 +5285,9 @@ class TargetLowering : public TargetLoweringBase { /// Expand fminimum/fmaximum into multiple comparison with selects. SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand fminimumnum/fmaximumnum into multiple comparison with selects. + SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const; + /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. /// \param N Node to expand /// \returns The expansion result diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 3f3d75012c6945..0dbcbc0b2cb76f 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1015,6 +1015,18 @@ class IRBuilderBase { return CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS, nullptr, Name); } + /// Create call to the minimumnum intrinsic. + Value *CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name = "") { + return CreateBinaryIntrinsic(Intrinsic::minimumnum, LHS, RHS, nullptr, + Name); + } + + /// Create call to the maximum intrinsic. + Value *CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name = "") { + return CreateBinaryIntrinsic(Intrinsic::maximumnum, LHS, RHS, nullptr, + Name); + } + /// Create call to the copysign intrinsic. Value *CreateCopySign(Value *LHS, Value *RHS, Instruction *FMFSource = nullptr, diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 94c8fa092f45e6..2f1e2c08c3ecec 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -76,6 +76,8 @@ class IntrinsicInst : public CallInst { case Intrinsic::minnum: case Intrinsic::maximum: case Intrinsic::minimum: + case Intrinsic::maximumnum: + case Intrinsic::minimumnum: case Intrinsic::smax: case Intrinsic::smin: case Intrinsic::umax: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index b4e758136b39fb..0841273fd2e1e5 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1085,6 +1085,14 @@ def int_maximum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] >; +def int_minimumnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] +>; +def int_maximumnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative] +>; // Internal interface for object size checking def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty], diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def index 89aaf6d1ad83f8..c3d5ef9f4e4f82 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -299,6 +299,16 @@ HANDLE_LIBCALL(FMAX_F64, "fmax") HANDLE_LIBCALL(FMAX_F80, "fmaxl") HANDLE_LIBCALL(FMAX_F128, "fmaxl") HANDLE_LIBCALL(FMAX_PPCF128, "fmaxl") +HANDLE_LIBCALL(FMINIMUMNUM_F32, "fminimum_numf") +HANDLE_LIBCALL(FMINIMUMNUM_F64, "fminimum_num") +HANDLE_LIBCALL(FMINIMUMNUM_F80, "fminimum_numl") +HANDLE_LIBCALL(FMINIMUMNUM_F128, "fminmum_numl") +HANDLE_LIBCALL(FMINIMUMNUM_PPCF128, "fminimum_numl") +HANDLE_LIBCALL(FMAXIMUMNUM_F32, "fmaximum_numf") +HANDLE_LIBCALL(FMAXIMUMNUM_F64, "fmaximum_num") +HANDLE_LIBCALL(FMAXIMUMNUM_F80, "fmaximum_numl") +HANDLE_LIBCALL(FMAXIMUMNUM_F128, "fmaxmum_numl") +HANDLE_LIBCALL(FMAXIMUMNUM_PPCF128, "fmaximum_numl") HANDLE_LIBCALL(LROUND_F32, "lroundf") HANDLE_LIBCALL(LROUND_F64, "lround") HANDLE_LIBCALL(LROUND_F80, "lroundl") diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td index e40ad2062166ea..172deffbd31771 100644 --- a/llvm/include/llvm/Target/TargetSelectionDAG.td +++ b/llvm/include/llvm/Target/TargetSelectionDAG.td @@ -517,6 +517,10 @@ def fminimum : SDNode<"ISD::FMINIMUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; def fmaximum : SDNode<"ISD::FMAXIMUM" , SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; +def fminimumnum : SDNode<"ISD::FMINIMUMNUM" , SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def fmaximumnum : SDNode<"ISD::FMAXIMUMNUM" , SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; def fgetsign : SDNode<"ISD::FGETSIGN" , SDTFPToIntOp>; def fcanonicalize : SDNode<"ISD::FCANONICALIZE", SDTFPUnaryOp>; def fneg : SDNode<"ISD::FNEG" , SDTFPUnaryOp>; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0aa8b82f533f2a..25644c24855a62 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1939,7 +1939,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMINNUM: case ISD::FMAXNUM: case ISD::FMINIMUM: - case ISD::FMAXIMUM: return visitFMinMax(N); + case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: return visitFMinMax(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::FFREXP: return visitFFREXP(N); @@ -6068,6 +6070,7 @@ static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1); } +// FIXME: use FMINIMUMNUM if possible, such as for RISC-V. static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, ISD::CondCode CC, unsigned OrAndOpcode, SelectionDAG &DAG, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3eadfbf51ddaa1..e7f765382b0e46 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3660,6 +3660,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Expanded); break; } + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: { + Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG)); + break; + } case ISD::FSIN: case ISD::FCOS: { EVT VT = Node->getValueType(0); @@ -4539,6 +4544,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128, Results); break; + case ISD::FMINIMUMNUM: + ExpandFPLibCall(Node, RTLIB::FMINIMUMNUM_F32, RTLIB::FMINIMUMNUM_F64, + RTLIB::FMINIMUMNUM_F80, RTLIB::FMINIMUMNUM_F128, + RTLIB::FMINIMUMNUM_PPCF128, Results); + break; + case ISD::FMAXIMUMNUM: + ExpandFPLibCall(Node, RTLIB::FMAXIMUMNUM_F32, RTLIB::FMAXIMUMNUM_F64, + RTLIB::FMAXIMUMNUM_F80, RTLIB::FMAXIMUMNUM_F128, + RTLIB::FMAXIMUMNUM_PPCF128, Results); + break; case ISD::FSQRT: case ISD::STRICT_FSQRT: ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, @@ -5464,6 +5479,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: case ISD::FPOW: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 0c881d81a2c639..ad0c054d3ccd50 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -74,6 +74,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; + case ISD::FMINIMUMNUM: R = SoftenFloatRes_FMINIMUMNUM(N); break; + case ISD::FMAXIMUMNUM: R = SoftenFloatRes_FMAXIMUMNUM(N); break; case ISD::STRICT_FADD: case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::STRICT_FACOS: @@ -323,6 +325,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { RTLIB::FMAX_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMINIMUMNUM(SDNode *N) { + return SoftenFloatRes_Binary( + N, GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUMNUM_F32, + RTLIB::FMINIMUMNUM_F64, RTLIB::FMINIMUMNUM_F80, + RTLIB::FMINIMUMNUM_F128, RTLIB::FMINIMUMNUM_PPCF128)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXIMUMNUM(SDNode *N) { + return SoftenFloatRes_Binary( + N, GetFPLibCall(N->getValueType(0), RTLIB::FMAXIMUMNUM_F32, + RTLIB::FMAXIMUMNUM_F64, RTLIB::FMAXIMUMNUM_F80, + RTLIB::FMAXIMUMNUM_F128, RTLIB::FMAXIMUMNUM_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, @@ -1404,6 +1420,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; case ISD::STRICT_FMAXNUM: case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; + case ISD::FMINIMUMNUM: ExpandFloatRes_FMINIMUMNUM(N, Lo, Hi); break; + case ISD::FMAXIMUMNUM: ExpandFloatRes_FMAXIMUMNUM(N, Lo, Hi); break; case ISD::STRICT_FADD: case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; case ISD::STRICT_FACOS: @@ -1558,6 +1576,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, RTLIB::FMAX_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Binary( + N, + GetFPLibCall(N->getValueType(0), RTLIB::FMINIMUMNUM_F32, + RTLIB::FMINIMUMNUM_F64, RTLIB::FMINIMUMNUM_F80, + RTLIB::FMINIMUMNUM_F128, RTLIB::FMINIMUMNUM_PPCF128), + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMAXIMUMNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Binary( + N, + GetFPLibCall(N->getValueType(0), RTLIB::FMAXIMUMNUM_F32, + RTLIB::FMAXIMUMNUM_F64, RTLIB::FMAXIMUMNUM_F80, + RTLIB::FMAXIMUMNUM_F128, RTLIB::FMAXIMUMNUM_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), @@ -2621,6 +2659,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FDIV: case ISD::FMAXIMUM: case ISD::FMINIMUM: + case ISD::FMAXIMUMNUM: + case ISD::FMINIMUMNUM: case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::FMAXNUM_IEEE: @@ -3063,6 +3103,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FDIV: case ISD::FMAXIMUM: case ISD::FMINIMUM: + case ISD::FMAXIMUMNUM: + case ISD::FMINIMUMNUM: case ISD::FMAXNUM: case ISD::FMINNUM: case ISD::FMUL: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 3a49a8ff10860a..6de1e3eca7feda 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -567,6 +567,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FATAN(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); + SDValue SoftenFloatRes_FMINIMUMNUM(SDNode *N); + SDValue SoftenFloatRes_FMAXIMUMNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCBRT(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); @@ -659,6 +661,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandFloatRes_FATAN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMINIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMAXIMUMNUM(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCBRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index ab12c3b0e728a8..7bf90ceb93cb4e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5465,7 +5465,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const return false; } case ISD::FMINNUM: - case ISD::FMAXNUM: { + case ISD::FMAXNUM: + case ISD::FMINIMUMNUM: + case ISD::FMAXIMUMNUM: { // Only one needs to be known not-nan, since it will be returned if the // other ends up being one. return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) || @@ -6804,6 +6806,10 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, return getConstantFP(minimum(C1, C2), DL, VT); case ISD::FMAXIMUM: return getConstantFP(maximum(C1, C2), DL, VT); + case ISD::FMINIMUMNUM: + return getConstantFP(minimumnum(C1, C2), DL, VT); + case ISD::FMAXIMUMNUM: + return getConstantFP(maximumnum(C1, C2), DL, VT); default: break; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 37ba62911ec70b..7cdd3d47b641d7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6882,6 +6882,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::minimumnum: + setValue(&I, DAG.getNode(ISD::FMINIMUMNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), Flags)); + return; + case Intrinsic::maximumnum: + setValue(&I, DAG.getNode(ISD::FMAXIMUMNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), Flags)); + return; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -9257,6 +9269,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitBinaryFloatCall(I, ISD::FMAXNUM)) return; break; + case LibFunc_fminimum_num: + case LibFunc_fminimum_numf: + case LibFunc_fminimum_numl: + if (visitBinaryFloatCall(I, ISD::FMINIMUMNUM)) + return; + break; + case LibFunc_fmaximum_num: + case LibFunc_fmaximum_numf: + case LibFunc_fmaximum_numl: + if (visitBinaryFloatCall(I, ISD::FMAXIMUMNUM)) + return; + break; case LibFunc_sin: case LibFunc_sinf: case LibFunc_sinl: diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 46e8e54ee4ed7d..001f782f209fdb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -203,6 +203,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FMINIMUM: return "strict_fminimum"; case ISD::FMAXIMUM: return "fmaximum"; case ISD::STRICT_FMAXIMUM: return "strict_fmaximum"; + case ISD::FMINIMUMNUM: return "fminimumnum"; + case ISD::FMAXIMUMNUM: return "fmaximumnum"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::STRICT_FSQRT: return "strict_fsqrt"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4cf1e655b00990..2c939967a5e1d9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8558,6 +8558,94 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N, return MinMax; } +SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node, + SelectionDAG &DAG) const { + SDLoc DL(Node); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + unsigned Opc = Node->getOpcode(); + EVT VT = Node->getValueType(0); + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + bool IsMax = Opc == ISD::FMAXIMUMNUM; + const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags Flags = Node->getFlags(); + + unsigned NewOp = + Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; + + if (isOperationLegalOrCustom(NewOp, VT)) { + if (!Flags.hasNoNaNs()) { + // Insert canonicalizes if it's possible we need to quiet to get correct + // sNaN behavior. + if (!DAG.isKnownNeverSNaN(LHS)) { + LHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, LHS, Flags); + } + if (!DAG.isKnownNeverSNaN(RHS)) { + RHS = DAG.getNode(ISD::FCANONICALIZE, DL, VT, RHS, Flags); + } + } + + return DAG.getNode(NewOp, DL, VT, LHS, RHS, Flags); + } + + // We can use FMINIMUM/FMAXIMUM if there is no NaN, since it has + // same behaviors for all of other cases: +0.0 vs -0.0 included. + if (Flags.hasNoNaNs() || + (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS))) { + unsigned IEEE2019Op = + Opc == ISD::FMINIMUMNUM ? ISD::FMINIMUM : ISD::FMAXIMUM; + if (isOperationLegalOrCustom(IEEE2019Op, VT)) + return DAG.getNode(IEEE2019Op, DL, VT, LHS, RHS, Flags); + } + + // FMINNUM/FMAXMUM returns qNaN if either operand is sNaN, and it may return + // either one for +0.0 vs -0.0. + if ((Flags.hasNoNaNs() || + (DAG.isKnownNeverSNaN(LHS) && DAG.isKnownNeverSNaN(RHS))) && + (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(LHS) || + DAG.isKnownNeverZeroFloat(RHS))) { + unsigned IEEE2008Op = Opc == ISD::FMINIMUMNUM ? ISD::FMINNUM : ISD::FMAXNUM; + if (isOperationLegalOrCustom(IEEE2008Op, VT)) + return DAG.getNode(IEEE2008Op, DL, VT, LHS, RHS, Flags); + } + + // If only one operand is NaN, override it with another operand. + if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS)) { + LHS = DAG.getSelectCC(DL, LHS, LHS, RHS, LHS, ISD::SETUO); + } + if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(RHS)) { + RHS = DAG.getSelectCC(DL, RHS, RHS, LHS, RHS, ISD::SETUO); + } + + SDValue MinMax = + DAG.getSelectCC(DL, LHS, RHS, LHS, RHS, IsMax ? ISD::SETGT : ISD::SETLT); + // If MinMax is NaN, let's quiet it. + if (!Flags.hasNoNaNs() && !DAG.isKnownNeverNaN(LHS) && + !DAG.isKnownNeverNaN(RHS)) { + SDValue MinMaxQuiet = + DAG.getNode(ISD::FCANONICALIZE, DL, VT, MinMax, Flags); + MinMax = + DAG.getSelectCC(DL, MinMax, MinMax, MinMaxQuiet, MinMax, ISD::SETUO); + } + + // Fixup signed zero behavior. + if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros() || + DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) { + return MinMax; + } + SDValue TestZero = + DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32); + SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax, + DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ); + SDValue LCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS, + MinMax, Flags); + SDValue RCmp = DAG.getSelect( + DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp, + Flags); + return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags); +} + /// Returns a true value if if this FPClassTest can be performed with an ordered /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns /// std::nullopt if it cannot be performed as a compare with 0. diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 149b5dabee0565..4ff8617f740c89 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -713,6 +713,7 @@ void TargetLoweringBase::initActions() { ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, ISD::FMINIMUM, ISD::FMAXIMUM, + ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::FMAD, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS, diff --git a/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll new file mode 100644 index 00000000000000..b4fdd954b856c8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fp-maximumnum-minimumnum.ll @@ -0,0 +1,431 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA32F +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32D +; RUN: llc --mtriple=loongarch64 --mattr=+f,-d < %s | FileCheck %s --check-prefix=LA64F +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64D + +declare float @llvm.maximumnum.f32(float, float) +declare double @llvm.maximumnum.f64(double, double) +declare float @llvm.minimumnum.f32(float, float) +declare double @llvm.minimumnum.f64(double, double) + +define float @maximumnum_float(float %x, float %y) { +; +; LA32F-LABEL: maximumnum_float: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maximumnum_float: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maximumnum_float: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maximumnum_float: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + +define float @maximumnum_float_nsz(float %x, float %y) { +; +; LA32F-LABEL: maximumnum_float_nsz: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maximumnum_float_nsz: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maximumnum_float_nsz: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maximumnum_float_nsz: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nsz float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + +define float @maximumnum_float_nnan(float %x, float %y) { +; +; LA32F-LABEL: maximumnum_float_nnan: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maximumnum_float_nnan: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maximumnum_float_nnan: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maximumnum_float_nnan: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nnan float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + + +define double @maximumnum_double(double %x, double %y) { +; +; LA32F-LABEL: maximumnum_double: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fmaximum_num) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maximumnum_double: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maximumnum_double: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fmaximum_num) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maximumnum_double: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define double @maximumnum_double_nsz(double %x, double %y) { +; +; LA32F-LABEL: maximumnum_double_nsz: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fmaximum_num) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maximumnum_double_nsz: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maximumnum_double_nsz: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fmaximum_num) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maximumnum_double_nsz: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nsz double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define double @maximumnum_double_nnan(double %x, double %y) { +; +; LA32F-LABEL: maximumnum_double_nnan: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fmaximum_num) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: maximumnum_double_nnan: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: maximumnum_double_nnan: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fmaximum_num) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: maximumnum_double_nnan: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nnan double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define float @minimumnum_float(float %x, float %y) { +; +; LA32F-LABEL: minimumnum_float: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minimumnum_float: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minimumnum_float: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minimumnum_float: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define float @minimumnum_float_nsz(float %x, float %y) { +; +; LA32F-LABEL: minimumnum_float_nsz: +; LA32F: # %bb.0: +; LA32F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minimumnum_float_nsz: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minimumnum_float_nsz: +; LA64F: # %bb.0: +; LA64F-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64F-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minimumnum_float_nsz: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.s $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.s $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nsz float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define float @minimumnum_float_nnan(float %x, float %y) { +; +; LA32F-LABEL: minimumnum_float_nnan: +; LA32F: # %bb.0: +; LA32F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minimumnum_float_nnan: +; LA32D: # %bb.0: +; LA32D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minimumnum_float_nnan: +; LA64F: # %bb.0: +; LA64F-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minimumnum_float_nnan: +; LA64D: # %bb.0: +; LA64D-NEXT: fmin.s $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nnan float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define double @minimumnum_double(double %x, double %y) { +; +; LA32F-LABEL: minimumnum_double: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fminimum_num) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minimumnum_double: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minimumnum_double: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fminimum_num) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minimumnum_double: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} + +define double @minimumnum_double_nsz(double %x, double %y) { +; +; LA32F-LABEL: minimumnum_double_nsz: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fminimum_num) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minimumnum_double_nsz: +; LA32D: # %bb.0: +; LA32D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA32D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA32D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minimumnum_double_nsz: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fminimum_num) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minimumnum_double_nsz: +; LA64D: # %bb.0: +; LA64D-NEXT: fmax.d $fa1, $fa1, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nsz double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} + +define double @minimumnum_double_nnan(double %x, double %y) { +; +; LA32F-LABEL: minimumnum_double_nnan: +; LA32F: # %bb.0: +; LA32F-NEXT: addi.w $sp, $sp, -16 +; LA32F-NEXT: .cfi_def_cfa_offset 16 +; LA32F-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32F-NEXT: .cfi_offset 1, -4 +; LA32F-NEXT: bl %plt(fminimum_num) +; LA32F-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32F-NEXT: addi.w $sp, $sp, 16 +; LA32F-NEXT: ret +; +; LA32D-LABEL: minimumnum_double_nnan: +; LA32D: # %bb.0: +; LA32D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA32D-NEXT: ret +; +; LA64F-LABEL: minimumnum_double_nnan: +; LA64F: # %bb.0: +; LA64F-NEXT: addi.d $sp, $sp, -16 +; LA64F-NEXT: .cfi_def_cfa_offset 16 +; LA64F-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64F-NEXT: .cfi_offset 1, -8 +; LA64F-NEXT: bl %plt(fminimum_num) +; LA64F-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 16 +; LA64F-NEXT: ret +; +; LA64D-LABEL: minimumnum_double_nnan: +; LA64D: # %bb.0: +; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: ret + %z = call nnan double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} diff --git a/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll b/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll new file mode 100644 index 00000000000000..bc81966ca0f5c9 --- /dev/null +++ b/llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=mipsisa32r6 < %s | FileCheck %s --check-prefix=MIPS32R6 + +declare float @llvm.maximumnum.f32(float, float) +declare double @llvm.maximumnum.f64(double, double) +declare float @llvm.minimumnum.f32(float, float) +declare double @llvm.minimumnum.f64(double, double) + +define float @maximumnum_float(float %x, float %y) { +; MIPS32R6-LABEL: maximumnum_float: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.s $f0, $f14, $f14 +; MIPS32R6-NEXT: min.s $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: max.s $f0, $f1, $f0 + %z = call float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + +define float @maximumnum_float_nsz(float %x, float %y) { +; MIPS32R6-LABEL: maximumnum_float_nsz: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.s $f0, $f14, $f14 +; MIPS32R6-NEXT: min.s $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: max.s $f0, $f1, $f0 + %z = call nsz float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + +define float @maximumnum_float_nnan(float %x, float %y) { +; MIPS32R6-LABEL: maximumnum_float_nnan: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: max.s $f0, $f12, $f14 + %z = call nnan float @llvm.maximumnum.f32(float %x, float %y) + ret float %z +} + + +define double @maximumnum_double(double %x, double %y) { +; MIPS32R6-LABEL: maximumnum_double: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.d $f0, $f14, $f14 +; MIPS32R6-NEXT: min.d $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: max.d $f0, $f1, $f0 + %z = call double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define double @maximumnum_double_nsz(double %x, double %y) { +; MIPS32R6-LABEL: maximumnum_double_nsz: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.d $f0, $f14, $f14 +; MIPS32R6-NEXT: min.d $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: max.d $f0, $f1, $f0 + %z = call nsz double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define double @maximumnum_double_nnan(double %x, double %y) { +; MIPS32R6-LABEL: maximumnum_double_nnan: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: max.d $f0, $f12, $f14 + %z = call nnan double @llvm.maximumnum.f64(double %x, double %y) + ret double %z +} + +define float @minimumnum_float(float %x, float %y) { +; MIPS32R6-LABEL: minimumnum_float: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.s $f0, $f14, $f14 +; MIPS32R6-NEXT: min.s $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.s $f0, $f1, $f0 + %z = call float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define float @minimumnum_float_nsz(float %x, float %y) { +; MIPS32R6-LABEL: minimumnum_float_nsz: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.s $f0, $f14, $f14 +; MIPS32R6-NEXT: min.s $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.s $f0, $f1, $f0 + %z = call nsz float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define float @minimumnum_float_nnan(float %x, float %y) { +; MIPS32R6-LABEL: minimumnum_float_nnan: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.s $f0, $f12, $f14 + %z = call nnan float @llvm.minimumnum.f32(float %x, float %y) + ret float %z +} + +define double @minimumnum_double(double %x, double %y) { +; MIPS32R6-LABEL: minimumnum_double: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.d $f0, $f14, $f14 +; MIPS32R6-NEXT: min.d $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.d $f0, $f1, $f0 + %z = call double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} + +define double @minimumnum_double_nsz(double %x, double %y) { +; MIPS32R6-LABEL: minimumnum_double_nsz: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: min.d $f0, $f14, $f14 +; MIPS32R6-NEXT: min.d $f1, $f12, $f12 +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.d $f0, $f1, $f0 + %z = call nsz double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} + +define double @minimumnum_double_nnan(double %x, double %y) { +; MIPS32R6-LABEL: minimumnum_double_nnan: +; MIPS32R6: # %bb.0: +; MIPS32R6-NEXT: jr $ra +; MIPS32R6-NEXT: min.d $f0, $f12, $f14 + %z = call nnan double @llvm.minimumnum.f64(double %x, double %y) + ret double %z +} diff --git a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml index 9b37b49b3d49d2..cff5019f8e6ee4 100644 --- a/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml +++ b/llvm/test/tools/llvm-tli-checker/ps4-tli-check.yaml @@ -32,15 +32,21 @@ # RUN: FileCheck %s --check-prefix=AVAIL --input-file %t3.txt # RUN: FileCheck %s --check-prefix=UNAVAIL --input-file %t3.txt # -# CHECK: << Total TLI yes SDK no: 12 +# CHECK: << Total TLI yes SDK no: 18 # CHECK: >> Total TLI no SDK yes: 0 # CHECK: == Total TLI yes SDK yes: 248 # # WRONG_DETAIL: << TLI yes SDK no : '_ZdaPv' aka operator delete[](void*) # WRONG_DETAIL: >> TLI no SDK yes: '_ZdaPvj' aka operator delete[](void*, unsigned int) -# WRONG_DETAIL-COUNT-8: << TLI yes SDK no : '_Zn{{.*}}__hot_cold_t +# WRONG_DETAIL-COUNT-8: << TLI yes SDK no : {{.*}}__hot_cold_t # WRONG_DETAIL-COUNT-4: << TLI yes SDK no : '__size_returning_new{{.*}} -# WRONG_SUMMARY: << Total TLI yes SDK no: 13{{$}} +# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_num' +# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_numf' +# WRONG_DETAIL: << TLI yes SDK no : 'fmaximum_numl' +# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_num' +# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numf' +# WRONG_DETAIL: << TLI yes SDK no : 'fminimum_numl' +# WRONG_SUMMARY: << Total TLI yes SDK no: 19{{$}} # WRONG_SUMMARY: >> Total TLI no SDK yes: 1{{$}} # WRONG_SUMMARY: == Total TLI yes SDK yes: 247 # @@ -48,8 +54,8 @@ ## the exact count first; the two directives should add up to that. ## Yes, this means additions to TLI will fail this test, but the argument ## to -COUNT can't be an expression. -# AVAIL: TLI knows 493 symbols, 260 available -# AVAIL-COUNT-260: {{^}} available +# AVAIL: TLI knows 499 symbols, 266 available +# AVAIL-COUNT-266: {{^}} available # AVAIL-NOT: {{^}} available # UNAVAIL-COUNT-233: not available # UNAVAIL-NOT: not available diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp index 68bf8e670771ee..ff7dec5bee31df 100644 --- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp +++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp @@ -212,6 +212,12 @@ TEST_F(TargetLibraryInfoTest, ValidProto) { "declare double @fmin(double, double)\n" "declare float @fminf(float, float)\n" "declare x86_fp80 @fminl(x86_fp80, x86_fp80)\n" + "declare double @fmaximum_num(double, double)\n" + "declare float @fmaximum_numf(float, float)\n" + "declare x86_fp80 @fmaximum_numl(x86_fp80, x86_fp80)\n" + "declare double @fminimum_num(double, double)\n" + "declare float @fminimum_numf(float, float)\n" + "declare x86_fp80 @fminimum_numl(x86_fp80, x86_fp80)\n" "declare double @fmod(double, double)\n" "declare float @fmodf(float, float)\n" "declare x86_fp80 @fmodl(x86_fp80, x86_fp80)\n" From 8d037107287b85dcc8a0f0af75bd8ae5c07facb5 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 15 Aug 2024 08:17:22 +0200 Subject: [PATCH 28/47] [ctx_prof] Remove an unneeded include in CtxProfAnalysis.cpp --- llvm/lib/Analysis/CtxProfAnalysis.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp index 5bf336dd311158..7b4666b29a1936 100644 --- a/llvm/lib/Analysis/CtxProfAnalysis.cpp +++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h" #define DEBUG_TYPE "ctx_prof" From 372842b30f8e611765e3cb9f06b8265d2e79f3f6 Mon Sep 17 00:00:00 2001 From: Freddy Ye Date: Thu, 15 Aug 2024 14:18:59 +0800 Subject: [PATCH 29/47] [X86][MC] Remove CMPCCXADD's CondCode flavor. (#103898) To align with gas's latest changes. relate gas patch: https://sourceware.org/pipermail/binutils/2024-May/134360.html --- clang/test/CodeGen/X86/cmpccxadd-builtins.c | 48 +-- .../X86/MCTargetDesc/X86InstPrinterCommon.cpp | 13 +- llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll | 120 ++++---- .../MC/Disassembler/X86/apx/cmpccxadd.txt | 80 ++--- .../test/MC/Disassembler/X86/cmpccxadd-64.txt | 288 +++++++++--------- llvm/test/MC/X86/apx/cmpccxadd-att.s | 80 ++--- llvm/test/MC/X86/apx/cmpccxadd-intel.s | 80 ++--- llvm/test/MC/X86/cmpccxadd-att-alias.s | 26 +- llvm/test/MC/X86/cmpccxadd-att.s | 288 +++++++++--------- llvm/test/MC/X86/cmpccxadd-intel-alias.s | 26 +- llvm/test/MC/X86/cmpccxadd-intel.s | 288 +++++++++--------- 11 files changed, 668 insertions(+), 669 deletions(-) diff --git a/clang/test/CodeGen/X86/cmpccxadd-builtins.c b/clang/test/CodeGen/X86/cmpccxadd-builtins.c index 6daed3a1b17b67..f058dc9b2baa46 100644 --- a/clang/test/CodeGen/X86/cmpccxadd-builtins.c +++ b/clang/test/CodeGen/X86/cmpccxadd-builtins.c @@ -52,50 +52,50 @@ long long test_cmplxadd64(void *__A, long long __B, long long __C) { return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NB); } -int test_cmpnbexadd32(void *__A, int __B, int __C) { - // CHECK-LABEL: @test_cmpnbexadd32( +int test_cmpaxadd32(void *__A, int __B, int __C) { + // CHECK-LABEL: @test_cmpaxadd32( // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 4) return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_Z); } -long long test_cmpnbexadd64(void *__A, long long __B, long long __C) { - // CHECK-LABEL: @test_cmpnbexadd64( +long long test_cmpaxadd64(void *__A, long long __B, long long __C) { + // CHECK-LABEL: @test_cmpaxadd64( // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 4) return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_Z); } -int test_cmpnbxadd32(void *__A, int __B, int __C) { - // CHECK-LABEL: @test_cmpnbxadd32( +int test_cmpaexadd32(void *__A, int __B, int __C) { + // CHECK-LABEL: @test_cmpaexadd32( // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 5) return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NZ); } -long long test_cmpnbxadd64(void *__A, long long __B, long long __C) { - // CHECK-LABEL: @test_cmpnbxadd64( +long long test_cmpaexadd64(void *__A, long long __B, long long __C) { + // CHECK-LABEL: @test_cmpaexadd64( // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 5) return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NZ); } -int test_cmpnlexadd32(void *__A, int __B, int __C) { - // CHECK-LABEL: @test_cmpnlexadd32( +int test_cmpgxadd32(void *__A, int __B, int __C) { + // CHECK-LABEL: @test_cmpgxadd32( // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 6) return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_BE); } -long long test_cmpnlexadd64(void *__A, long long __B, long long __C) { - // CHECK-LABEL: @test_cmpnlexadd64( +long long test_cmpgxadd64(void *__A, long long __B, long long __C) { + // CHECK-LABEL: @test_cmpgxadd64( // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 6) return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_BE); } -int test_cmpnlxadd32(void *__A, int __B, int __C) { - // CHECK-LABEL: @test_cmpnlxadd32( +int test_cmpgexadd32(void *__A, int __B, int __C) { + // CHECK-LABEL: @test_cmpgexadd32( // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 7) return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NBE); } -long long test_cmpnlxadd64(void *__A, long long __B, long long __C) { - // CHECK-LABEL: @test_cmpnlxadd64( +long long test_cmpgexadd64(void *__A, long long __B, long long __C) { + // CHECK-LABEL: @test_cmpgexadd64( // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 7) return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NBE); } @@ -136,14 +136,14 @@ long long test_cmpnsxadd64(void *__A, long long __B, long long __C) { return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_P); } -int test_cmpnzxadd32(void *__A, int __B, int __C) { - // CHECK-LABEL: @test_cmpnzxadd32( +int test_cmpnexadd32(void *__A, int __B, int __C) { + // CHECK-LABEL: @test_cmpnexadd32( // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 11) return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NP); } -long long test_cmpnzxadd64(void *__A, long long __B, long long __C) { - // CHECK-LABEL: @test_cmpnzxadd64( +long long test_cmpnexadd64(void *__A, long long __B, long long __C) { + // CHECK-LABEL: @test_cmpnexadd64( // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 11) return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NP); } @@ -184,14 +184,14 @@ long long test_cmpsxadd64(void *__A, long long __B, long long __C) { return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_LE); } -int test_cmpzxadd32(void *__A, int __B, int __C) { - // CHECK-LABEL: @test_cmpzxadd32( +int test_cmpexadd32(void *__A, int __B, int __C) { + // CHECK-LABEL: @test_cmpexadd32( // CHECK: call i32 @llvm.x86.cmpccxadd32(ptr %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 15) return _cmpccxadd_epi32(__A, __B, __C, _CMPCCX_NLE); } -long long test_cmpzxadd64(void *__A, long long __B, long long __C) { - // CHECK-LABEL: @test_cmpzxadd64( +long long test_cmpexadd64(void *__A, long long __B, long long __C) { + // CHECK-LABEL: @test_cmpexadd64( // CHECK: call i64 @llvm.x86.cmpccxadd64(ptr %{{.*}}, i64 %{{.*}}, i64 %{{.*}}, i32 15) return _cmpccxadd_epi64(__A, __B, __C, _CMPCCX_NLE); } diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp index 21c1556d1d8ed2..ad1f2dc532d1c2 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp @@ -30,7 +30,6 @@ void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op, raw_ostream &O) { int64_t Imm = MI->getOperand(Op).getImm(); unsigned Opc = MI->getOpcode(); - bool IsCMPCCXADD = X86::isCMPCCXADD(Opc); bool IsCCMPOrCTEST = X86::isCCMPCC(Opc) || X86::isCTESTCC(Opc); // clang-format off @@ -39,19 +38,19 @@ void X86InstPrinterCommon::printCondCode(const MCInst *MI, unsigned Op, case 0: O << "o"; break; case 1: O << "no"; break; case 2: O << "b"; break; - case 3: O << (IsCMPCCXADD ? "nb" : "ae"); break; - case 4: O << (IsCMPCCXADD ? "z" : "e"); break; - case 5: O << (IsCMPCCXADD ? "nz" : "ne"); break; + case 3: O << "ae"; break; + case 4: O << "e"; break; + case 5: O << "ne"; break; case 6: O << "be"; break; - case 7: O << (IsCMPCCXADD ? "nbe" : "a"); break; + case 7: O << "a"; break; case 8: O << "s"; break; case 9: O << "ns"; break; case 0xa: O << (IsCCMPOrCTEST ? "t" : "p"); break; case 0xb: O << (IsCCMPOrCTEST ? "f" : "np"); break; case 0xc: O << "l"; break; - case 0xd: O << (IsCMPCCXADD ? "nl" : "ge"); break; + case 0xd: O << "ge"; break; case 0xe: O << "le"; break; - case 0xf: O << (IsCMPCCXADD ? "nle" : "g"); break; + case 0xf: O << "g"; break; } // clang-format on } diff --git a/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll b/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll index f88216f95a7614..561289c1b77465 100644 --- a/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll +++ b/llvm/test/CodeGen/X86/cmpccxadd-intrinsics.ll @@ -112,13 +112,13 @@ define dso_local i32 @test_cmplxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { ; CHECK-LABEL: test_cmplxadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: cmpnbxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe3,0x07] +; CHECK-NEXT: cmpaexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe3,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; EGPR-LABEL: test_cmplxadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; EGPR-NEXT: cmpnbxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe3,0x07] +; EGPR-NEXT: cmpaexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe3,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 3) @@ -129,95 +129,95 @@ define dso_local i64 @test_cmplxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { ; CHECK-LABEL: test_cmplxadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; CHECK-NEXT: cmpnbxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe3,0x07] +; CHECK-NEXT: cmpaexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe3,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; EGPR-LABEL: test_cmplxadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; EGPR-NEXT: cmpnbxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe3,0x07] +; EGPR-NEXT: cmpaexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe3,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 3) ret i64 %0 } -define dso_local i32 @test_cmpnbexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { -; CHECK-LABEL: test_cmpnbexadd32: +define dso_local i32 @test_cmpaxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { +; CHECK-LABEL: test_cmpaxadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: cmpzxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe4,0x07] +; CHECK-NEXT: cmpexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe4,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnbexadd32: +; EGPR-LABEL: test_cmpaxadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; EGPR-NEXT: cmpzxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe4,0x07] +; EGPR-NEXT: cmpexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe4,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 4) ret i32 %0 } -define dso_local i64 @test_cmpnbexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { -; CHECK-LABEL: test_cmpnbexadd64: +define dso_local i64 @test_cmpaxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { +; CHECK-LABEL: test_cmpaxadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; CHECK-NEXT: cmpzxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe4,0x07] +; CHECK-NEXT: cmpexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe4,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnbexadd64: +; EGPR-LABEL: test_cmpaxadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; EGPR-NEXT: cmpzxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe4,0x07] +; EGPR-NEXT: cmpexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe4,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 4) ret i64 %0 } -define dso_local i32 @test_cmpnbxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { -; CHECK-LABEL: test_cmpnbxadd32: +define dso_local i32 @test_cmpaexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { +; CHECK-LABEL: test_cmpaexadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: cmpnzxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe5,0x07] +; CHECK-NEXT: cmpnexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe5,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnbxadd32: +; EGPR-LABEL: test_cmpaexadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; EGPR-NEXT: cmpnzxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe5,0x07] +; EGPR-NEXT: cmpnexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe5,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 5) ret i32 %0 } -define dso_local i64 @test_cmpnbxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { -; CHECK-LABEL: test_cmpnbxadd64: +define dso_local i64 @test_cmpaexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { +; CHECK-LABEL: test_cmpaexadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; CHECK-NEXT: cmpnzxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe5,0x07] +; CHECK-NEXT: cmpnexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe5,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnbxadd64: +; EGPR-LABEL: test_cmpaexadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; EGPR-NEXT: cmpnzxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe5,0x07] +; EGPR-NEXT: cmpnexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe5,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 5) ret i64 %0 } -define dso_local i32 @test_cmpnlexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { -; CHECK-LABEL: test_cmpnlexadd32: +define dso_local i32 @test_cmpgxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { +; CHECK-LABEL: test_cmpgxadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] ; CHECK-NEXT: cmpbexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe6,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnlexadd32: +; EGPR-LABEL: test_cmpgxadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] ; EGPR-NEXT: cmpbexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe6,0x07] @@ -227,14 +227,14 @@ entry: ret i32 %0 } -define dso_local i64 @test_cmpnlexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { -; CHECK-LABEL: test_cmpnlexadd64: +define dso_local i64 @test_cmpgxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { +; CHECK-LABEL: test_cmpgxadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] ; CHECK-NEXT: cmpbexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe6,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnlexadd64: +; EGPR-LABEL: test_cmpgxadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] ; EGPR-NEXT: cmpbexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe6,0x07] @@ -244,34 +244,34 @@ entry: ret i64 %0 } -define dso_local i32 @test_cmpnlxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { -; CHECK-LABEL: test_cmpnlxadd32: +define dso_local i32 @test_cmpgexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { +; CHECK-LABEL: test_cmpgexadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: cmpnbexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe7,0x07] +; CHECK-NEXT: cmpaxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xe7,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnlxadd32: +; EGPR-LABEL: test_cmpgexadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; EGPR-NEXT: cmpnbexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe7,0x07] +; EGPR-NEXT: cmpaxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xe7,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 7) ret i32 %0 } -define dso_local i64 @test_cmpnlxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { -; CHECK-LABEL: test_cmpnlxadd64: +define dso_local i64 @test_cmpgexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { +; CHECK-LABEL: test_cmpgexadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; CHECK-NEXT: cmpnbexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe7,0x07] +; CHECK-NEXT: cmpaxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xe7,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnlxadd64: +; EGPR-LABEL: test_cmpgexadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; EGPR-NEXT: cmpnbexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe7,0x07] +; EGPR-NEXT: cmpaxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xe7,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 7) @@ -380,14 +380,14 @@ entry: ret i64 %0 } -define dso_local i32 @test_cmpnzxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { -; CHECK-LABEL: test_cmpnzxadd32: +define dso_local i32 @test_cmpnexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { +; CHECK-LABEL: test_cmpnexadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] ; CHECK-NEXT: cmpnpxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xeb,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnzxadd32: +; EGPR-LABEL: test_cmpnexadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] ; EGPR-NEXT: cmpnpxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xeb,0x07] @@ -397,14 +397,14 @@ entry: ret i32 %0 } -define dso_local i64 @test_cmpnzxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { -; CHECK-LABEL: test_cmpnzxadd64: +define dso_local i64 @test_cmpnexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { +; CHECK-LABEL: test_cmpnexadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] ; CHECK-NEXT: cmpnpxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xeb,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpnzxadd64: +; EGPR-LABEL: test_cmpnexadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] ; EGPR-NEXT: cmpnpxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xeb,0x07] @@ -452,13 +452,13 @@ define dso_local i32 @test_cmppxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { ; CHECK-LABEL: test_cmppxadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: cmpnlxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xed,0x07] +; CHECK-NEXT: cmpgexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xed,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; EGPR-LABEL: test_cmppxadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; EGPR-NEXT: cmpnlxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xed,0x07] +; EGPR-NEXT: cmpgexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xed,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 13) @@ -469,13 +469,13 @@ define dso_local i64 @test_cmppxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { ; CHECK-LABEL: test_cmppxadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; CHECK-NEXT: cmpnlxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xed,0x07] +; CHECK-NEXT: cmpgexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xed,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; ; EGPR-LABEL: test_cmppxadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; EGPR-NEXT: cmpnlxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xed,0x07] +; EGPR-NEXT: cmpgexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xed,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 13) @@ -516,34 +516,34 @@ entry: ret i64 %0 } -define dso_local i32 @test_cmpzxadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { -; CHECK-LABEL: test_cmpzxadd32: +define dso_local i32 @test_cmpexadd32(ptr %__A, i32 %__B, i32 %__C) nounwind { +; CHECK-LABEL: test_cmpexadd32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; CHECK-NEXT: cmpnlexadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xef,0x07] +; CHECK-NEXT: cmpgxadd %edx, %eax, (%rdi) # encoding: [0xc4,0xe2,0x69,0xef,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpzxadd32: +; EGPR-LABEL: test_cmpexadd32: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movl %esi, %eax # encoding: [0x89,0xf0] -; EGPR-NEXT: cmpnlexadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xef,0x07] +; EGPR-NEXT: cmpgxadd %edx, %eax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0xef,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i32 @llvm.x86.cmpccxadd32(ptr %__A, i32 %__B, i32 %__C, i32 15) ret i32 %0 } -define dso_local i64 @test_cmpzxadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { -; CHECK-LABEL: test_cmpzxadd64: +define dso_local i64 @test_cmpexadd64(ptr %__A, i64 %__B, i64 %__C) nounwind { +; CHECK-LABEL: test_cmpexadd64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; CHECK-NEXT: cmpnlexadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xef,0x07] +; CHECK-NEXT: cmpgxadd %rdx, %rax, (%rdi) # encoding: [0xc4,0xe2,0xe9,0xef,0x07] ; CHECK-NEXT: retq # encoding: [0xc3] ; -; EGPR-LABEL: test_cmpzxadd64: +; EGPR-LABEL: test_cmpexadd64: ; EGPR: # %bb.0: # %entry ; EGPR-NEXT: movq %rsi, %rax # encoding: [0x48,0x89,0xf0] -; EGPR-NEXT: cmpnlexadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xef,0x07] +; EGPR-NEXT: cmpgxadd %rdx, %rax, (%rdi) # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xe9,0xef,0x07] ; EGPR-NEXT: retq # encoding: [0xc3] entry: %0 = tail call i64 @llvm.x86.cmpccxadd64(ptr %__A, i64 %__B, i64 %__C, i32 15) diff --git a/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt b/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt index 2a54bebd5212c9..7a2e09af5b3db3 100644 --- a/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt +++ b/llvm/test/MC/Disassembler/X86/apx/cmpccxadd.txt @@ -1,20 +1,20 @@ # RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT # RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL -# ATT: cmpnbexadd %ecx, %edx, 123(%rax,%rbx,4) -# INTEL: cmpnbexadd dword ptr [rax + 4*rbx + 123], edx, ecx +# ATT: cmpaxadd %ecx, %edx, 123(%rax,%rbx,4) +# INTEL: cmpaxadd dword ptr [rax + 4*rbx + 123], edx, ecx 0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b -# ATT: cmpnbexadd %r9, %r15, 123(%rax,%rbx,4) -# INTEL: cmpnbexadd qword ptr [rax + 4*rbx + 123], r15, r9 +# ATT: cmpaxadd %r9, %r15, 123(%rax,%rbx,4) +# INTEL: cmpaxadd qword ptr [rax + 4*rbx + 123], r15, r9 0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b -# ATT: cmpnbexadd %r18d, %r22d, 291(%r28,%r29,4) -# INTEL: cmpnbexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# ATT: cmpaxadd %r18d, %r22d, 291(%r28,%r29,4) +# INTEL: cmpaxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d 0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpnbexadd %r19, %r23, 291(%r28,%r29,4) -# INTEL: cmpnbexadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# ATT: cmpaxadd %r19, %r23, 291(%r28,%r29,4) +# INTEL: cmpaxadd qword ptr [r28 + 4*r29 + 291], r23, r19 0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00 # ATT: cmpbexadd %ecx, %edx, 123(%rax,%rbx,4) @@ -49,52 +49,52 @@ # INTEL: cmpbxadd qword ptr [r28 + 4*r29 + 291], r23, r19 0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpzxadd %ecx, %edx, 123(%rax,%rbx,4) -# INTEL: cmpzxadd dword ptr [rax + 4*rbx + 123], edx, ecx +# ATT: cmpexadd %ecx, %edx, 123(%rax,%rbx,4) +# INTEL: cmpexadd dword ptr [rax + 4*rbx + 123], edx, ecx 0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b -# ATT: cmpzxadd %r9, %r15, 123(%rax,%rbx,4) -# INTEL: cmpzxadd qword ptr [rax + 4*rbx + 123], r15, r9 +# ATT: cmpexadd %r9, %r15, 123(%rax,%rbx,4) +# INTEL: cmpexadd qword ptr [rax + 4*rbx + 123], r15, r9 0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b -# ATT: cmpzxadd %r18d, %r22d, 291(%r28,%r29,4) -# INTEL: cmpzxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# ATT: cmpexadd %r18d, %r22d, 291(%r28,%r29,4) +# INTEL: cmpexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d 0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpzxadd %r19, %r23, 291(%r28,%r29,4) -# INTEL: cmpzxadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# ATT: cmpexadd %r19, %r23, 291(%r28,%r29,4) +# INTEL: cmpexadd qword ptr [r28 + 4*r29 + 291], r23, r19 0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpnlxadd %ecx, %edx, 123(%rax,%rbx,4) -# INTEL: cmpnlxadd dword ptr [rax + 4*rbx + 123], edx, ecx +# ATT: cmpgexadd %ecx, %edx, 123(%rax,%rbx,4) +# INTEL: cmpgexadd dword ptr [rax + 4*rbx + 123], edx, ecx 0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b -# ATT: cmpnlxadd %r9, %r15, 123(%rax,%rbx,4) -# INTEL: cmpnlxadd qword ptr [rax + 4*rbx + 123], r15, r9 +# ATT: cmpgexadd %r9, %r15, 123(%rax,%rbx,4) +# INTEL: cmpgexadd qword ptr [rax + 4*rbx + 123], r15, r9 0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b -# ATT: cmpnlxadd %r18d, %r22d, 291(%r28,%r29,4) -# INTEL: cmpnlxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# ATT: cmpgexadd %r18d, %r22d, 291(%r28,%r29,4) +# INTEL: cmpgexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d 0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpnlxadd %r19, %r23, 291(%r28,%r29,4) -# INTEL: cmpnlxadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# ATT: cmpgexadd %r19, %r23, 291(%r28,%r29,4) +# INTEL: cmpgexadd qword ptr [r28 + 4*r29 + 291], r23, r19 0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpnlexadd %ecx, %edx, 123(%rax,%rbx,4) -# INTEL: cmpnlexadd dword ptr [rax + 4*rbx + 123], edx, ecx +# ATT: cmpgxadd %ecx, %edx, 123(%rax,%rbx,4) +# INTEL: cmpgxadd dword ptr [rax + 4*rbx + 123], edx, ecx 0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b -# ATT: cmpnlexadd %r9, %r15, 123(%rax,%rbx,4) -# INTEL: cmpnlexadd qword ptr [rax + 4*rbx + 123], r15, r9 +# ATT: cmpgxadd %r9, %r15, 123(%rax,%rbx,4) +# INTEL: cmpgxadd qword ptr [rax + 4*rbx + 123], r15, r9 0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b -# ATT: cmpnlexadd %r18d, %r22d, 291(%r28,%r29,4) -# INTEL: cmpnlexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# ATT: cmpgxadd %r18d, %r22d, 291(%r28,%r29,4) +# INTEL: cmpgxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d 0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpnlexadd %r19, %r23, 291(%r28,%r29,4) -# INTEL: cmpnlexadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# ATT: cmpgxadd %r19, %r23, 291(%r28,%r29,4) +# INTEL: cmpgxadd qword ptr [r28 + 4*r29 + 291], r23, r19 0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00 # ATT: cmplexadd %ecx, %edx, 123(%rax,%rbx,4) @@ -129,20 +129,20 @@ # INTEL: cmplxadd qword ptr [r28 + 4*r29 + 291], r23, r19 0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpnzxadd %ecx, %edx, 123(%rax,%rbx,4) -# INTEL: cmpnzxadd dword ptr [rax + 4*rbx + 123], edx, ecx +# ATT: cmpnexadd %ecx, %edx, 123(%rax,%rbx,4) +# INTEL: cmpnexadd dword ptr [rax + 4*rbx + 123], edx, ecx 0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b -# ATT: cmpnzxadd %r9, %r15, 123(%rax,%rbx,4) -# INTEL: cmpnzxadd qword ptr [rax + 4*rbx + 123], r15, r9 +# ATT: cmpnexadd %r9, %r15, 123(%rax,%rbx,4) +# INTEL: cmpnexadd qword ptr [rax + 4*rbx + 123], r15, r9 0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b -# ATT: cmpnzxadd %r18d, %r22d, 291(%r28,%r29,4) -# INTEL: cmpnzxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# ATT: cmpnexadd %r18d, %r22d, 291(%r28,%r29,4) +# INTEL: cmpnexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d 0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00 -# ATT: cmpnzxadd %r19, %r23, 291(%r28,%r29,4) -# INTEL: cmpnzxadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# ATT: cmpnexadd %r19, %r23, 291(%r28,%r29,4) +# INTEL: cmpnexadd qword ptr [r28 + 4*r29 + 291], r23, r19 0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00 # ATT: cmpnoxadd %ecx, %edx, 123(%rax,%rbx,4) diff --git a/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt b/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt index 62420db37f40d7..7b1599de263263 100644 --- a/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt +++ b/llvm/test/MC/Disassembler/X86/cmpccxadd-64.txt @@ -193,196 +193,196 @@ # INTEL: cmplxadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff -# ATT: cmpnbexadd %eax, %ecx, 268435456(%rbp,%r14,8) -# INTEL: cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +# ATT: cmpaxadd %eax, %ecx, 268435456(%rbp,%r14,8) +# INTEL: cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax 0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnbexadd %eax, %ecx, 291(%r8,%rax,4) -# INTEL: cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax +# ATT: cmpaxadd %eax, %ecx, 291(%r8,%rax,4) +# INTEL: cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax 0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnbexadd %eax, %ecx, (%rip) -# INTEL: cmpnbexadd dword ptr [rip], ecx, eax +# ATT: cmpaxadd %eax, %ecx, (%rip) +# INTEL: cmpaxadd dword ptr [rip], ecx, eax 0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnbexadd %eax, %ecx, -128(,%rbp,2) -# INTEL: cmpnbexadd dword ptr [2*rbp - 128], ecx, eax +# ATT: cmpaxadd %eax, %ecx, -128(,%rbp,2) +# INTEL: cmpaxadd dword ptr [2*rbp - 128], ecx, eax 0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff -# ATT: cmpnbexadd %eax, %ecx, 508(%rcx) -# INTEL: cmpnbexadd dword ptr [rcx + 508], ecx, eax +# ATT: cmpaxadd %eax, %ecx, 508(%rcx) +# INTEL: cmpaxadd dword ptr [rcx + 508], ecx, eax 0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00 -# ATT: cmpnbexadd %eax, %ecx, -512(%rdx) -# INTEL: cmpnbexadd dword ptr [rdx - 512], ecx, eax +# ATT: cmpaxadd %eax, %ecx, -512(%rdx) +# INTEL: cmpaxadd dword ptr [rdx - 512], ecx, eax 0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff -# ATT: cmpnbexadd %r10, %r9, 268435456(%rbp,%r14,8) -# INTEL: cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +# ATT: cmpaxadd %r10, %r9, 268435456(%rbp,%r14,8) +# INTEL: cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnbexadd %r10, %r9, 291(%r8,%rax,4) -# INTEL: cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10 +# ATT: cmpaxadd %r10, %r9, 291(%r8,%rax,4) +# INTEL: cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10 0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnbexadd %r10, %r9, (%rip) -# INTEL: cmpnbexadd qword ptr [rip], r9, r10 +# ATT: cmpaxadd %r10, %r9, (%rip) +# INTEL: cmpaxadd qword ptr [rip], r9, r10 0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnbexadd %r10, %r9, -256(,%rbp,2) -# INTEL: cmpnbexadd qword ptr [2*rbp - 256], r9, r10 +# ATT: cmpaxadd %r10, %r9, -256(,%rbp,2) +# INTEL: cmpaxadd qword ptr [2*rbp - 256], r9, r10 0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff -# ATT: cmpnbexadd %r10, %r9, 1016(%rcx) -# INTEL: cmpnbexadd qword ptr [rcx + 1016], r9, r10 +# ATT: cmpaxadd %r10, %r9, 1016(%rcx) +# INTEL: cmpaxadd qword ptr [rcx + 1016], r9, r10 0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00 -# ATT: cmpnbexadd %r10, %r9, -1024(%rdx) -# INTEL: cmpnbexadd qword ptr [rdx - 1024], r9, r10 +# ATT: cmpaxadd %r10, %r9, -1024(%rdx) +# INTEL: cmpaxadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff -# ATT: cmpnbxadd %eax, %ecx, 268435456(%rbp,%r14,8) -# INTEL: cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +# ATT: cmpaexadd %eax, %ecx, 268435456(%rbp,%r14,8) +# INTEL: cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax 0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnbxadd %eax, %ecx, 291(%r8,%rax,4) -# INTEL: cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax +# ATT: cmpaexadd %eax, %ecx, 291(%r8,%rax,4) +# INTEL: cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax 0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnbxadd %eax, %ecx, (%rip) -# INTEL: cmpnbxadd dword ptr [rip], ecx, eax +# ATT: cmpaexadd %eax, %ecx, (%rip) +# INTEL: cmpaexadd dword ptr [rip], ecx, eax 0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnbxadd %eax, %ecx, -128(,%rbp,2) -# INTEL: cmpnbxadd dword ptr [2*rbp - 128], ecx, eax +# ATT: cmpaexadd %eax, %ecx, -128(,%rbp,2) +# INTEL: cmpaexadd dword ptr [2*rbp - 128], ecx, eax 0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff -# ATT: cmpnbxadd %eax, %ecx, 508(%rcx) -# INTEL: cmpnbxadd dword ptr [rcx + 508], ecx, eax +# ATT: cmpaexadd %eax, %ecx, 508(%rcx) +# INTEL: cmpaexadd dword ptr [rcx + 508], ecx, eax 0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00 -# ATT: cmpnbxadd %eax, %ecx, -512(%rdx) -# INTEL: cmpnbxadd dword ptr [rdx - 512], ecx, eax +# ATT: cmpaexadd %eax, %ecx, -512(%rdx) +# INTEL: cmpaexadd dword ptr [rdx - 512], ecx, eax 0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff -# ATT: cmpnbxadd %r10, %r9, 268435456(%rbp,%r14,8) -# INTEL: cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +# ATT: cmpaexadd %r10, %r9, 268435456(%rbp,%r14,8) +# INTEL: cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnbxadd %r10, %r9, 291(%r8,%rax,4) -# INTEL: cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10 +# ATT: cmpaexadd %r10, %r9, 291(%r8,%rax,4) +# INTEL: cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10 0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnbxadd %r10, %r9, (%rip) -# INTEL: cmpnbxadd qword ptr [rip], r9, r10 +# ATT: cmpaexadd %r10, %r9, (%rip) +# INTEL: cmpaexadd qword ptr [rip], r9, r10 0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnbxadd %r10, %r9, -256(,%rbp,2) -# INTEL: cmpnbxadd qword ptr [2*rbp - 256], r9, r10 +# ATT: cmpaexadd %r10, %r9, -256(,%rbp,2) +# INTEL: cmpaexadd qword ptr [2*rbp - 256], r9, r10 0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff -# ATT: cmpnbxadd %r10, %r9, 1016(%rcx) -# INTEL: cmpnbxadd qword ptr [rcx + 1016], r9, r10 +# ATT: cmpaexadd %r10, %r9, 1016(%rcx) +# INTEL: cmpaexadd qword ptr [rcx + 1016], r9, r10 0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00 -# ATT: cmpnbxadd %r10, %r9, -1024(%rdx) -# INTEL: cmpnbxadd qword ptr [rdx - 1024], r9, r10 +# ATT: cmpaexadd %r10, %r9, -1024(%rdx) +# INTEL: cmpaexadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff -# ATT: cmpnlexadd %eax, %ecx, 268435456(%rbp,%r14,8) -# INTEL: cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +# ATT: cmpgxadd %eax, %ecx, 268435456(%rbp,%r14,8) +# INTEL: cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax 0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnlexadd %eax, %ecx, 291(%r8,%rax,4) -# INTEL: cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax +# ATT: cmpgxadd %eax, %ecx, 291(%r8,%rax,4) +# INTEL: cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax 0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnlexadd %eax, %ecx, (%rip) -# INTEL: cmpnlexadd dword ptr [rip], ecx, eax +# ATT: cmpgxadd %eax, %ecx, (%rip) +# INTEL: cmpgxadd dword ptr [rip], ecx, eax 0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnlexadd %eax, %ecx, -128(,%rbp,2) -# INTEL: cmpnlexadd dword ptr [2*rbp - 128], ecx, eax +# ATT: cmpgxadd %eax, %ecx, -128(,%rbp,2) +# INTEL: cmpgxadd dword ptr [2*rbp - 128], ecx, eax 0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff -# ATT: cmpnlexadd %eax, %ecx, 508(%rcx) -# INTEL: cmpnlexadd dword ptr [rcx + 508], ecx, eax +# ATT: cmpgxadd %eax, %ecx, 508(%rcx) +# INTEL: cmpgxadd dword ptr [rcx + 508], ecx, eax 0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00 -# ATT: cmpnlexadd %eax, %ecx, -512(%rdx) -# INTEL: cmpnlexadd dword ptr [rdx - 512], ecx, eax +# ATT: cmpgxadd %eax, %ecx, -512(%rdx) +# INTEL: cmpgxadd dword ptr [rdx - 512], ecx, eax 0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff -# ATT: cmpnlexadd %r10, %r9, 268435456(%rbp,%r14,8) -# INTEL: cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +# ATT: cmpgxadd %r10, %r9, 268435456(%rbp,%r14,8) +# INTEL: cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnlexadd %r10, %r9, 291(%r8,%rax,4) -# INTEL: cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10 +# ATT: cmpgxadd %r10, %r9, 291(%r8,%rax,4) +# INTEL: cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10 0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnlexadd %r10, %r9, (%rip) -# INTEL: cmpnlexadd qword ptr [rip], r9, r10 +# ATT: cmpgxadd %r10, %r9, (%rip) +# INTEL: cmpgxadd qword ptr [rip], r9, r10 0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnlexadd %r10, %r9, -256(,%rbp,2) -# INTEL: cmpnlexadd qword ptr [2*rbp - 256], r9, r10 +# ATT: cmpgxadd %r10, %r9, -256(,%rbp,2) +# INTEL: cmpgxadd qword ptr [2*rbp - 256], r9, r10 0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff -# ATT: cmpnlexadd %r10, %r9, 1016(%rcx) -# INTEL: cmpnlexadd qword ptr [rcx + 1016], r9, r10 +# ATT: cmpgxadd %r10, %r9, 1016(%rcx) +# INTEL: cmpgxadd qword ptr [rcx + 1016], r9, r10 0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00 -# ATT: cmpnlexadd %r10, %r9, -1024(%rdx) -# INTEL: cmpnlexadd qword ptr [rdx - 1024], r9, r10 +# ATT: cmpgxadd %r10, %r9, -1024(%rdx) +# INTEL: cmpgxadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff -# ATT: cmpnlxadd %eax, %ecx, 268435456(%rbp,%r14,8) -# INTEL: cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +# ATT: cmpgexadd %eax, %ecx, 268435456(%rbp,%r14,8) +# INTEL: cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax 0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnlxadd %eax, %ecx, 291(%r8,%rax,4) -# INTEL: cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax +# ATT: cmpgexadd %eax, %ecx, 291(%r8,%rax,4) +# INTEL: cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax 0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnlxadd %eax, %ecx, (%rip) -# INTEL: cmpnlxadd dword ptr [rip], ecx, eax +# ATT: cmpgexadd %eax, %ecx, (%rip) +# INTEL: cmpgexadd dword ptr [rip], ecx, eax 0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnlxadd %eax, %ecx, -128(,%rbp,2) -# INTEL: cmpnlxadd dword ptr [2*rbp - 128], ecx, eax +# ATT: cmpgexadd %eax, %ecx, -128(,%rbp,2) +# INTEL: cmpgexadd dword ptr [2*rbp - 128], ecx, eax 0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff -# ATT: cmpnlxadd %eax, %ecx, 508(%rcx) -# INTEL: cmpnlxadd dword ptr [rcx + 508], ecx, eax +# ATT: cmpgexadd %eax, %ecx, 508(%rcx) +# INTEL: cmpgexadd dword ptr [rcx + 508], ecx, eax 0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00 -# ATT: cmpnlxadd %eax, %ecx, -512(%rdx) -# INTEL: cmpnlxadd dword ptr [rdx - 512], ecx, eax +# ATT: cmpgexadd %eax, %ecx, -512(%rdx) +# INTEL: cmpgexadd dword ptr [rdx - 512], ecx, eax 0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff -# ATT: cmpnlxadd %r10, %r9, 268435456(%rbp,%r14,8) -# INTEL: cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +# ATT: cmpgexadd %r10, %r9, 268435456(%rbp,%r14,8) +# INTEL: cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnlxadd %r10, %r9, 291(%r8,%rax,4) -# INTEL: cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10 +# ATT: cmpgexadd %r10, %r9, 291(%r8,%rax,4) +# INTEL: cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10 0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnlxadd %r10, %r9, (%rip) -# INTEL: cmpnlxadd qword ptr [rip], r9, r10 +# ATT: cmpgexadd %r10, %r9, (%rip) +# INTEL: cmpgexadd qword ptr [rip], r9, r10 0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnlxadd %r10, %r9, -256(,%rbp,2) -# INTEL: cmpnlxadd qword ptr [2*rbp - 256], r9, r10 +# ATT: cmpgexadd %r10, %r9, -256(,%rbp,2) +# INTEL: cmpgexadd qword ptr [2*rbp - 256], r9, r10 0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff -# ATT: cmpnlxadd %r10, %r9, 1016(%rcx) -# INTEL: cmpnlxadd qword ptr [rcx + 1016], r9, r10 +# ATT: cmpgexadd %r10, %r9, 1016(%rcx) +# INTEL: cmpgexadd qword ptr [rcx + 1016], r9, r10 0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00 -# ATT: cmpnlxadd %r10, %r9, -1024(%rdx) -# INTEL: cmpnlxadd qword ptr [rdx - 1024], r9, r10 +# ATT: cmpgexadd %r10, %r9, -1024(%rdx) +# INTEL: cmpgexadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff # ATT: cmpnoxadd %eax, %ecx, 268435456(%rbp,%r14,8) @@ -529,52 +529,52 @@ # INTEL: cmpnsxadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff -# ATT: cmpnzxadd %eax, %ecx, 268435456(%rbp,%r14,8) -# INTEL: cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +# ATT: cmpnexadd %eax, %ecx, 268435456(%rbp,%r14,8) +# INTEL: cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax 0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnzxadd %eax, %ecx, 291(%r8,%rax,4) -# INTEL: cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax +# ATT: cmpnexadd %eax, %ecx, 291(%r8,%rax,4) +# INTEL: cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax 0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnzxadd %eax, %ecx, (%rip) -# INTEL: cmpnzxadd dword ptr [rip], ecx, eax +# ATT: cmpnexadd %eax, %ecx, (%rip) +# INTEL: cmpnexadd dword ptr [rip], ecx, eax 0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnzxadd %eax, %ecx, -128(,%rbp,2) -# INTEL: cmpnzxadd dword ptr [2*rbp - 128], ecx, eax +# ATT: cmpnexadd %eax, %ecx, -128(,%rbp,2) +# INTEL: cmpnexadd dword ptr [2*rbp - 128], ecx, eax 0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff -# ATT: cmpnzxadd %eax, %ecx, 508(%rcx) -# INTEL: cmpnzxadd dword ptr [rcx + 508], ecx, eax +# ATT: cmpnexadd %eax, %ecx, 508(%rcx) +# INTEL: cmpnexadd dword ptr [rcx + 508], ecx, eax 0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00 -# ATT: cmpnzxadd %eax, %ecx, -512(%rdx) -# INTEL: cmpnzxadd dword ptr [rdx - 512], ecx, eax +# ATT: cmpnexadd %eax, %ecx, -512(%rdx) +# INTEL: cmpnexadd dword ptr [rdx - 512], ecx, eax 0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff -# ATT: cmpnzxadd %r10, %r9, 268435456(%rbp,%r14,8) -# INTEL: cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +# ATT: cmpnexadd %r10, %r9, 268435456(%rbp,%r14,8) +# INTEL: cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpnzxadd %r10, %r9, 291(%r8,%rax,4) -# INTEL: cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10 +# ATT: cmpnexadd %r10, %r9, 291(%r8,%rax,4) +# INTEL: cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10 0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpnzxadd %r10, %r9, (%rip) -# INTEL: cmpnzxadd qword ptr [rip], r9, r10 +# ATT: cmpnexadd %r10, %r9, (%rip) +# INTEL: cmpnexadd qword ptr [rip], r9, r10 0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpnzxadd %r10, %r9, -256(,%rbp,2) -# INTEL: cmpnzxadd qword ptr [2*rbp - 256], r9, r10 +# ATT: cmpnexadd %r10, %r9, -256(,%rbp,2) +# INTEL: cmpnexadd qword ptr [2*rbp - 256], r9, r10 0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff -# ATT: cmpnzxadd %r10, %r9, 1016(%rcx) -# INTEL: cmpnzxadd qword ptr [rcx + 1016], r9, r10 +# ATT: cmpnexadd %r10, %r9, 1016(%rcx) +# INTEL: cmpnexadd qword ptr [rcx + 1016], r9, r10 0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00 -# ATT: cmpnzxadd %r10, %r9, -1024(%rdx) -# INTEL: cmpnzxadd qword ptr [rdx - 1024], r9, r10 +# ATT: cmpnexadd %r10, %r9, -1024(%rdx) +# INTEL: cmpnexadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff # ATT: cmpoxadd %eax, %ecx, 268435456(%rbp,%r14,8) @@ -721,52 +721,52 @@ # INTEL: cmpsxadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff -# ATT: cmpzxadd %eax, %ecx, 268435456(%rbp,%r14,8) -# INTEL: cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +# ATT: cmpexadd %eax, %ecx, 268435456(%rbp,%r14,8) +# INTEL: cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax 0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpzxadd %eax, %ecx, 291(%r8,%rax,4) -# INTEL: cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax +# ATT: cmpexadd %eax, %ecx, 291(%r8,%rax,4) +# INTEL: cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax 0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpzxadd %eax, %ecx, (%rip) -# INTEL: cmpzxadd dword ptr [rip], ecx, eax +# ATT: cmpexadd %eax, %ecx, (%rip) +# INTEL: cmpexadd dword ptr [rip], ecx, eax 0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpzxadd %eax, %ecx, -128(,%rbp,2) -# INTEL: cmpzxadd dword ptr [2*rbp - 128], ecx, eax +# ATT: cmpexadd %eax, %ecx, -128(,%rbp,2) +# INTEL: cmpexadd dword ptr [2*rbp - 128], ecx, eax 0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff -# ATT: cmpzxadd %eax, %ecx, 508(%rcx) -# INTEL: cmpzxadd dword ptr [rcx + 508], ecx, eax +# ATT: cmpexadd %eax, %ecx, 508(%rcx) +# INTEL: cmpexadd dword ptr [rcx + 508], ecx, eax 0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00 -# ATT: cmpzxadd %eax, %ecx, -512(%rdx) -# INTEL: cmpzxadd dword ptr [rdx - 512], ecx, eax +# ATT: cmpexadd %eax, %ecx, -512(%rdx) +# INTEL: cmpexadd dword ptr [rdx - 512], ecx, eax 0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff -# ATT: cmpzxadd %r10, %r9, 268435456(%rbp,%r14,8) -# INTEL: cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +# ATT: cmpexadd %r10, %r9, 268435456(%rbp,%r14,8) +# INTEL: cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10 -# ATT: cmpzxadd %r10, %r9, 291(%r8,%rax,4) -# INTEL: cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10 +# ATT: cmpexadd %r10, %r9, 291(%r8,%rax,4) +# INTEL: cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10 0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00 -# ATT: cmpzxadd %r10, %r9, (%rip) -# INTEL: cmpzxadd qword ptr [rip], r9, r10 +# ATT: cmpexadd %r10, %r9, (%rip) +# INTEL: cmpexadd qword ptr [rip], r9, r10 0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00 -# ATT: cmpzxadd %r10, %r9, -256(,%rbp,2) -# INTEL: cmpzxadd qword ptr [2*rbp - 256], r9, r10 +# ATT: cmpexadd %r10, %r9, -256(,%rbp,2) +# INTEL: cmpexadd qword ptr [2*rbp - 256], r9, r10 0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff -# ATT: cmpzxadd %r10, %r9, 1016(%rcx) -# INTEL: cmpzxadd qword ptr [rcx + 1016], r9, r10 +# ATT: cmpexadd %r10, %r9, 1016(%rcx) +# INTEL: cmpexadd qword ptr [rcx + 1016], r9, r10 0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00 -# ATT: cmpzxadd %r10, %r9, -1024(%rdx) -# INTEL: cmpzxadd qword ptr [rdx - 1024], r9, r10 +# ATT: cmpexadd %r10, %r9, -1024(%rdx) +# INTEL: cmpexadd qword ptr [rdx - 1024], r9, r10 0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff # ATT: cmpbexadd %ecx, %r8d, (%rip) diff --git a/llvm/test/MC/X86/apx/cmpccxadd-att.s b/llvm/test/MC/X86/apx/cmpccxadd-att.s index d6ade869ca1d26..544871274a41d1 100644 --- a/llvm/test/MC/X86/apx/cmpccxadd-att.s +++ b/llvm/test/MC/X86/apx/cmpccxadd-att.s @@ -3,21 +3,21 @@ # ERROR-COUNT-60: error: # ERROR-NOT: error: -# CHECK: {evex} cmpnbexadd %ecx, %edx, 123(%eax,%ebx,4) +# CHECK: {evex} cmpaxadd %ecx, %edx, 123(%eax,%ebx,4) # CHECK: encoding: [0x67,0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b] - {evex} cmpnbexadd %ecx, %edx, 123(%eax,%ebx,4) + {evex} cmpaxadd %ecx, %edx, 123(%eax,%ebx,4) -# CHECK: {evex} cmpnbexadd %r9, %r15, 123(%rax,%rbx,4) +# CHECK: {evex} cmpaxadd %r9, %r15, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b] - {evex} cmpnbexadd %r9, %r15, 123(%rax,%rbx,4) + {evex} cmpaxadd %r9, %r15, 123(%rax,%rbx,4) -# CHECK: cmpnbexadd %r18d, %r22d, 291(%r28,%r29,4) +# CHECK: cmpaxadd %r18d, %r22d, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnbexadd %r18d, %r22d, 291(%r28,%r29,4) + cmpaxadd %r18d, %r22d, 291(%r28,%r29,4) -# CHECK: cmpnbexadd %r19, %r23, 291(%r28,%r29,4) +# CHECK: cmpaxadd %r19, %r23, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnbexadd %r19, %r23, 291(%r28,%r29,4) + cmpaxadd %r19, %r23, 291(%r28,%r29,4) # CHECK: {evex} cmpbexadd %ecx, %edx, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe6,0x54,0x98,0x7b] @@ -51,53 +51,53 @@ # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00] cmpbxadd %r19, %r23, 291(%r28,%r29,4) -# CHECK: {evex} cmpzxadd %ecx, %edx, 123(%rax,%rbx,4) +# CHECK: {evex} cmpexadd %ecx, %edx, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b] - {evex} cmpzxadd %ecx, %edx, 123(%rax,%rbx,4) + {evex} cmpexadd %ecx, %edx, 123(%rax,%rbx,4) -# CHECK: {evex} cmpzxadd %r9, %r15, 123(%rax,%rbx,4) +# CHECK: {evex} cmpexadd %r9, %r15, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b] - {evex} cmpzxadd %r9, %r15, 123(%rax,%rbx,4) + {evex} cmpexadd %r9, %r15, 123(%rax,%rbx,4) -# CHECK: cmpzxadd %r18d, %r22d, 291(%r28,%r29,4) +# CHECK: cmpexadd %r18d, %r22d, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpzxadd %r18d, %r22d, 291(%r28,%r29,4) + cmpexadd %r18d, %r22d, 291(%r28,%r29,4) -# CHECK: cmpzxadd %r19, %r23, 291(%r28,%r29,4) +# CHECK: cmpexadd %r19, %r23, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpzxadd %r19, %r23, 291(%r28,%r29,4) + cmpexadd %r19, %r23, 291(%r28,%r29,4) -# CHECK: {evex} cmpnlxadd %ecx, %edx, 123(%rax,%rbx,4) +# CHECK: {evex} cmpgexadd %ecx, %edx, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b] - {evex} cmpnlxadd %ecx, %edx, 123(%rax,%rbx,4) + {evex} cmpgexadd %ecx, %edx, 123(%rax,%rbx,4) -# CHECK: {evex} cmpnlxadd %r9, %r15, 123(%rax,%rbx,4) +# CHECK: {evex} cmpgexadd %r9, %r15, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b] - {evex} cmpnlxadd %r9, %r15, 123(%rax,%rbx,4) + {evex} cmpgexadd %r9, %r15, 123(%rax,%rbx,4) -# CHECK: cmpnlxadd %r18d, %r22d, 291(%r28,%r29,4) +# CHECK: cmpgexadd %r18d, %r22d, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnlxadd %r18d, %r22d, 291(%r28,%r29,4) + cmpgexadd %r18d, %r22d, 291(%r28,%r29,4) -# CHECK: cmpnlxadd %r19, %r23, 291(%r28,%r29,4) +# CHECK: cmpgexadd %r19, %r23, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnlxadd %r19, %r23, 291(%r28,%r29,4) + cmpgexadd %r19, %r23, 291(%r28,%r29,4) -# CHECK: {evex} cmpnlexadd %ecx, %edx, 123(%rax,%rbx,4) +# CHECK: {evex} cmpgxadd %ecx, %edx, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b] - {evex} cmpnlexadd %ecx, %edx, 123(%rax,%rbx,4) + {evex} cmpgxadd %ecx, %edx, 123(%rax,%rbx,4) -# CHECK: {evex} cmpnlexadd %r9, %r15, 123(%rax,%rbx,4) +# CHECK: {evex} cmpgxadd %r9, %r15, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b] - {evex} cmpnlexadd %r9, %r15, 123(%rax,%rbx,4) + {evex} cmpgxadd %r9, %r15, 123(%rax,%rbx,4) -# CHECK: cmpnlexadd %r18d, %r22d, 291(%r28,%r29,4) +# CHECK: cmpgxadd %r18d, %r22d, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnlexadd %r18d, %r22d, 291(%r28,%r29,4) + cmpgxadd %r18d, %r22d, 291(%r28,%r29,4) -# CHECK: cmpnlexadd %r19, %r23, 291(%r28,%r29,4) +# CHECK: cmpgxadd %r19, %r23, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnlexadd %r19, %r23, 291(%r28,%r29,4) + cmpgxadd %r19, %r23, 291(%r28,%r29,4) # CHECK: {evex} cmplexadd %ecx, %edx, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xee,0x54,0x98,0x7b] @@ -131,21 +131,21 @@ # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00] cmplxadd %r19, %r23, 291(%r28,%r29,4) -# CHECK: {evex} cmpnzxadd %ecx, %edx, 123(%rax,%rbx,4) +# CHECK: {evex} cmpnexadd %ecx, %edx, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b] - {evex} cmpnzxadd %ecx, %edx, 123(%rax,%rbx,4) + {evex} cmpnexadd %ecx, %edx, 123(%rax,%rbx,4) -# CHECK: {evex} cmpnzxadd %r9, %r15, 123(%rax,%rbx,4) +# CHECK: {evex} cmpnexadd %r9, %r15, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b] - {evex} cmpnzxadd %r9, %r15, 123(%rax,%rbx,4) + {evex} cmpnexadd %r9, %r15, 123(%rax,%rbx,4) -# CHECK: cmpnzxadd %r18d, %r22d, 291(%r28,%r29,4) +# CHECK: cmpnexadd %r18d, %r22d, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnzxadd %r18d, %r22d, 291(%r28,%r29,4) + cmpnexadd %r18d, %r22d, 291(%r28,%r29,4) -# CHECK: cmpnzxadd %r19, %r23, 291(%r28,%r29,4) +# CHECK: cmpnexadd %r19, %r23, 291(%r28,%r29,4) # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnzxadd %r19, %r23, 291(%r28,%r29,4) + cmpnexadd %r19, %r23, 291(%r28,%r29,4) # CHECK: {evex} cmpnoxadd %ecx, %edx, 123(%rax,%rbx,4) # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe1,0x54,0x98,0x7b] diff --git a/llvm/test/MC/X86/apx/cmpccxadd-intel.s b/llvm/test/MC/X86/apx/cmpccxadd-intel.s index 4c44968fbf91ce..cace33e59d6a74 100644 --- a/llvm/test/MC/X86/apx/cmpccxadd-intel.s +++ b/llvm/test/MC/X86/apx/cmpccxadd-intel.s @@ -1,20 +1,20 @@ # RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s -# CHECK: {evex} cmpnbexadd dword ptr [rax + 4*rbx + 123], edx, ecx +# CHECK: {evex} cmpaxadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe7,0x54,0x98,0x7b] - {evex} cmpnbexadd dword ptr [rax + 4*rbx + 123], edx, ecx + {evex} cmpaxadd dword ptr [rax + 4*rbx + 123], edx, ecx -# CHECK: {evex} cmpnbexadd qword ptr [rax + 4*rbx + 123], r15, r9 +# CHECK: {evex} cmpaxadd qword ptr [rax + 4*rbx + 123], r15, r9 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe7,0x7c,0x98,0x7b] - {evex} cmpnbexadd qword ptr [rax + 4*rbx + 123], r15, r9 + {evex} cmpaxadd qword ptr [rax + 4*rbx + 123], r15, r9 -# CHECK: cmpnbexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# CHECK: cmpaxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe7,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnbexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d + cmpaxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d -# CHECK: cmpnbexadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# CHECK: cmpaxadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe7,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnbexadd qword ptr [r28 + 4*r29 + 291], r23, r19 + cmpaxadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: {evex} cmpbexadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe6,0x54,0x98,0x7b] @@ -48,53 +48,53 @@ # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe2,0xbc,0xac,0x23,0x01,0x00,0x00] cmpbxadd qword ptr [r28 + 4*r29 + 291], r23, r19 -# CHECK: {evex} cmpzxadd dword ptr [rax + 4*rbx + 123], edx, ecx +# CHECK: {evex} cmpexadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe4,0x54,0x98,0x7b] - {evex} cmpzxadd dword ptr [rax + 4*rbx + 123], edx, ecx + {evex} cmpexadd dword ptr [rax + 4*rbx + 123], edx, ecx -# CHECK: {evex} cmpzxadd qword ptr [rax + 4*rbx + 123], r15, r9 +# CHECK: {evex} cmpexadd qword ptr [rax + 4*rbx + 123], r15, r9 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe4,0x7c,0x98,0x7b] - {evex} cmpzxadd qword ptr [rax + 4*rbx + 123], r15, r9 + {evex} cmpexadd qword ptr [rax + 4*rbx + 123], r15, r9 -# CHECK: cmpzxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# CHECK: cmpexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe4,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpzxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d + cmpexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d -# CHECK: cmpzxadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# CHECK: cmpexadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe4,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpzxadd qword ptr [r28 + 4*r29 + 291], r23, r19 + cmpexadd qword ptr [r28 + 4*r29 + 291], r23, r19 -# CHECK: {evex} cmpnlxadd dword ptr [rax + 4*rbx + 123], edx, ecx +# CHECK: {evex} cmpgexadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xed,0x54,0x98,0x7b] - {evex} cmpnlxadd dword ptr [rax + 4*rbx + 123], edx, ecx + {evex} cmpgexadd dword ptr [rax + 4*rbx + 123], edx, ecx -# CHECK: {evex} cmpnlxadd qword ptr [rax + 4*rbx + 123], r15, r9 +# CHECK: {evex} cmpgexadd qword ptr [rax + 4*rbx + 123], r15, r9 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xed,0x7c,0x98,0x7b] - {evex} cmpnlxadd qword ptr [rax + 4*rbx + 123], r15, r9 + {evex} cmpgexadd qword ptr [rax + 4*rbx + 123], r15, r9 -# CHECK: cmpnlxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# CHECK: cmpgexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xed,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnlxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d + cmpgexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d -# CHECK: cmpnlxadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# CHECK: cmpgexadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xed,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnlxadd qword ptr [r28 + 4*r29 + 291], r23, r19 + cmpgexadd qword ptr [r28 + 4*r29 + 291], r23, r19 -# CHECK: {evex} cmpnlexadd dword ptr [rax + 4*rbx + 123], edx, ecx +# CHECK: {evex} cmpgxadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xef,0x54,0x98,0x7b] - {evex} cmpnlexadd dword ptr [rax + 4*rbx + 123], edx, ecx + {evex} cmpgxadd dword ptr [rax + 4*rbx + 123], edx, ecx -# CHECK: {evex} cmpnlexadd qword ptr [rax + 4*rbx + 123], r15, r9 +# CHECK: {evex} cmpgxadd qword ptr [rax + 4*rbx + 123], r15, r9 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xef,0x7c,0x98,0x7b] - {evex} cmpnlexadd qword ptr [rax + 4*rbx + 123], r15, r9 + {evex} cmpgxadd qword ptr [rax + 4*rbx + 123], r15, r9 -# CHECK: cmpnlexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# CHECK: cmpgxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xef,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnlexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d + cmpgxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d -# CHECK: cmpnlexadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# CHECK: cmpgxadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xef,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnlexadd qword ptr [r28 + 4*r29 + 291], r23, r19 + cmpgxadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: {evex} cmplexadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xee,0x54,0x98,0x7b] @@ -128,21 +128,21 @@ # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xec,0xbc,0xac,0x23,0x01,0x00,0x00] cmplxadd qword ptr [r28 + 4*r29 + 291], r23, r19 -# CHECK: {evex} cmpnzxadd dword ptr [rax + 4*rbx + 123], edx, ecx +# CHECK: {evex} cmpnexadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe5,0x54,0x98,0x7b] - {evex} cmpnzxadd dword ptr [rax + 4*rbx + 123], edx, ecx + {evex} cmpnexadd dword ptr [rax + 4*rbx + 123], edx, ecx -# CHECK: {evex} cmpnzxadd qword ptr [rax + 4*rbx + 123], r15, r9 +# CHECK: {evex} cmpnexadd qword ptr [rax + 4*rbx + 123], r15, r9 # CHECK: encoding: [0x62,0x72,0xb5,0x08,0xe5,0x7c,0x98,0x7b] - {evex} cmpnzxadd qword ptr [rax + 4*rbx + 123], r15, r9 + {evex} cmpnexadd qword ptr [rax + 4*rbx + 123], r15, r9 -# CHECK: cmpnzxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d +# CHECK: cmpnexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d # CHECK: encoding: [0x62,0x8a,0x69,0x00,0xe5,0xb4,0xac,0x23,0x01,0x00,0x00] - cmpnzxadd dword ptr [r28 + 4*r29 + 291], r22d, r18d + cmpnexadd dword ptr [r28 + 4*r29 + 291], r22d, r18d -# CHECK: cmpnzxadd qword ptr [r28 + 4*r29 + 291], r23, r19 +# CHECK: cmpnexadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: encoding: [0x62,0x8a,0xe1,0x00,0xe5,0xbc,0xac,0x23,0x01,0x00,0x00] - cmpnzxadd qword ptr [r28 + 4*r29 + 291], r23, r19 + cmpnexadd qword ptr [r28 + 4*r29 + 291], r23, r19 # CHECK: {evex} cmpnoxadd dword ptr [rax + 4*rbx + 123], edx, ecx # CHECK: encoding: [0x62,0xf2,0x75,0x08,0xe1,0x54,0x98,0x7b] diff --git a/llvm/test/MC/X86/cmpccxadd-att-alias.s b/llvm/test/MC/X86/cmpccxadd-att-alias.s index dcc0f105d7abc1..46c6588740b9cd 100644 --- a/llvm/test/MC/X86/cmpccxadd-att-alias.s +++ b/llvm/test/MC/X86/cmpccxadd-att-alias.s @@ -1,28 +1,28 @@ // RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s -// CHECK: cmpnbxadd %eax, %ecx, (%rip) +// CHECK: cmpaexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00] - cmpaexadd %eax, %ecx, (%rip) + cmpnbxadd %eax, %ecx, (%rip) -// CHECK: cmpzxadd %eax, %ecx, (%rip) +// CHECK: cmpexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00] - cmpexadd %eax, %ecx, (%rip) + cmpzxadd %eax, %ecx, (%rip) -// CHECK: cmpnzxadd %eax, %ecx, (%rip) +// CHECK: cmpnexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00] - cmpnexadd %eax, %ecx, (%rip) + cmpnzxadd %eax, %ecx, (%rip) -// CHECK: cmpnbexadd %eax, %ecx, (%rip) +// CHECK: cmpaxadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00] - cmpaxadd %eax, %ecx, (%rip) + cmpnbexadd %eax, %ecx, (%rip) -// CHECK: cmpnlxadd %eax, %ecx, (%rip) +// CHECK: cmpgexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00] - cmpgexadd %eax, %ecx, (%rip) + cmpnlxadd %eax, %ecx, (%rip) -// CHECK: cmpnlexadd %eax, %ecx, (%rip) +// CHECK: cmpgxadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00] - cmpgxadd %eax, %ecx, (%rip) + cmpnlexadd %eax, %ecx, (%rip) // CHECK: cmpbxadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00] @@ -32,7 +32,7 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00] cmpnaexadd %eax, %ecx, (%rip) -// CHECK: cmpnbxadd %eax, %ecx, (%rip) +// CHECK: cmpaexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00] cmpncxadd %eax, %ecx, (%rip) diff --git a/llvm/test/MC/X86/cmpccxadd-att.s b/llvm/test/MC/X86/cmpccxadd-att.s index c79cc55a15b81d..a7c9df91ab0c8e 100644 --- a/llvm/test/MC/X86/cmpccxadd-att.s +++ b/llvm/test/MC/X86/cmpccxadd-att.s @@ -196,197 +196,197 @@ // CHECK: encoding: [0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff] cmplxadd %r10, %r9, -1024(%rdx) -// CHECK: cmpnbexadd %eax, %ecx, 268435456(%rbp,%r14,8) +// CHECK: cmpaxadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbexadd %eax, %ecx, 268435456(%rbp,%r14,8) + cmpaxadd %eax, %ecx, 268435456(%rbp,%r14,8) -// CHECK: cmpnbexadd %eax, %ecx, 291(%r8,%rax,4) +// CHECK: cmpaxadd %eax, %ecx, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbexadd %eax, %ecx, 291(%r8,%rax,4) + cmpaxadd %eax, %ecx, 291(%r8,%rax,4) -// CHECK: cmpnbexadd %eax, %ecx, (%rip) +// CHECK: cmpaxadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00] - cmpnbexadd %eax, %ecx, (%rip) + cmpaxadd %eax, %ecx, (%rip) -// CHECK: cmpnbexadd %eax, %ecx, -128(,%rbp,2) +// CHECK: cmpaxadd %eax, %ecx, -128(,%rbp,2) // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnbexadd %eax, %ecx, -128(,%rbp,2) + cmpaxadd %eax, %ecx, -128(,%rbp,2) -// CHECK: cmpnbexadd %eax, %ecx, 508(%rcx) +// CHECK: cmpaxadd %eax, %ecx, 508(%rcx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00] - cmpnbexadd %eax, %ecx, 508(%rcx) + cmpaxadd %eax, %ecx, 508(%rcx) -// CHECK: cmpnbexadd %eax, %ecx, -512(%rdx) +// CHECK: cmpaxadd %eax, %ecx, -512(%rdx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff] - cmpnbexadd %eax, %ecx, -512(%rdx) + cmpaxadd %eax, %ecx, -512(%rdx) -// CHECK: cmpnbexadd %r10, %r9, 268435456(%rbp,%r14,8) +// CHECK: cmpaxadd %r10, %r9, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbexadd %r10, %r9, 268435456(%rbp,%r14,8) + cmpaxadd %r10, %r9, 268435456(%rbp,%r14,8) -// CHECK: cmpnbexadd %r10, %r9, 291(%r8,%rax,4) +// CHECK: cmpaxadd %r10, %r9, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbexadd %r10, %r9, 291(%r8,%rax,4) + cmpaxadd %r10, %r9, 291(%r8,%rax,4) -// CHECK: cmpnbexadd %r10, %r9, (%rip) +// CHECK: cmpaxadd %r10, %r9, (%rip) // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00] - cmpnbexadd %r10, %r9, (%rip) + cmpaxadd %r10, %r9, (%rip) -// CHECK: cmpnbexadd %r10, %r9, -256(,%rbp,2) +// CHECK: cmpaxadd %r10, %r9, -256(,%rbp,2) // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnbexadd %r10, %r9, -256(,%rbp,2) + cmpaxadd %r10, %r9, -256(,%rbp,2) -// CHECK: cmpnbexadd %r10, %r9, 1016(%rcx) +// CHECK: cmpaxadd %r10, %r9, 1016(%rcx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00] - cmpnbexadd %r10, %r9, 1016(%rcx) + cmpaxadd %r10, %r9, 1016(%rcx) -// CHECK: cmpnbexadd %r10, %r9, -1024(%rdx) +// CHECK: cmpaxadd %r10, %r9, -1024(%rdx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff] - cmpnbexadd %r10, %r9, -1024(%rdx) + cmpaxadd %r10, %r9, -1024(%rdx) -// CHECK: cmpnbxadd %eax, %ecx, 268435456(%rbp,%r14,8) +// CHECK: cmpaexadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbxadd %eax, %ecx, 268435456(%rbp,%r14,8) + cmpaexadd %eax, %ecx, 268435456(%rbp,%r14,8) -// CHECK: cmpnbxadd %eax, %ecx, 291(%r8,%rax,4) +// CHECK: cmpaexadd %eax, %ecx, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbxadd %eax, %ecx, 291(%r8,%rax,4) + cmpaexadd %eax, %ecx, 291(%r8,%rax,4) -// CHECK: cmpnbxadd %eax, %ecx, (%rip) +// CHECK: cmpaexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00] - cmpnbxadd %eax, %ecx, (%rip) + cmpaexadd %eax, %ecx, (%rip) -// CHECK: cmpnbxadd %eax, %ecx, -128(,%rbp,2) +// CHECK: cmpaexadd %eax, %ecx, -128(,%rbp,2) // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnbxadd %eax, %ecx, -128(,%rbp,2) + cmpaexadd %eax, %ecx, -128(,%rbp,2) -// CHECK: cmpnbxadd %eax, %ecx, 508(%rcx) +// CHECK: cmpaexadd %eax, %ecx, 508(%rcx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00] - cmpnbxadd %eax, %ecx, 508(%rcx) + cmpaexadd %eax, %ecx, 508(%rcx) -// CHECK: cmpnbxadd %eax, %ecx, -512(%rdx) +// CHECK: cmpaexadd %eax, %ecx, -512(%rdx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff] - cmpnbxadd %eax, %ecx, -512(%rdx) + cmpaexadd %eax, %ecx, -512(%rdx) -// CHECK: cmpnbxadd %r10, %r9, 268435456(%rbp,%r14,8) +// CHECK: cmpaexadd %r10, %r9, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbxadd %r10, %r9, 268435456(%rbp,%r14,8) + cmpaexadd %r10, %r9, 268435456(%rbp,%r14,8) -// CHECK: cmpnbxadd %r10, %r9, 291(%r8,%rax,4) +// CHECK: cmpaexadd %r10, %r9, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbxadd %r10, %r9, 291(%r8,%rax,4) + cmpaexadd %r10, %r9, 291(%r8,%rax,4) -// CHECK: cmpnbxadd %r10, %r9, (%rip) +// CHECK: cmpaexadd %r10, %r9, (%rip) // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00] - cmpnbxadd %r10, %r9, (%rip) + cmpaexadd %r10, %r9, (%rip) -// CHECK: cmpnbxadd %r10, %r9, -256(,%rbp,2) +// CHECK: cmpaexadd %r10, %r9, -256(,%rbp,2) // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnbxadd %r10, %r9, -256(,%rbp,2) + cmpaexadd %r10, %r9, -256(,%rbp,2) -// CHECK: cmpnbxadd %r10, %r9, 1016(%rcx) +// CHECK: cmpaexadd %r10, %r9, 1016(%rcx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00] - cmpnbxadd %r10, %r9, 1016(%rcx) + cmpaexadd %r10, %r9, 1016(%rcx) -// CHECK: cmpnbxadd %r10, %r9, -1024(%rdx) +// CHECK: cmpaexadd %r10, %r9, -1024(%rdx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff] - cmpnbxadd %r10, %r9, -1024(%rdx) + cmpaexadd %r10, %r9, -1024(%rdx) -// CHECK: cmpnlexadd %eax, %ecx, 268435456(%rbp,%r14,8) +// CHECK: cmpgxadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlexadd %eax, %ecx, 268435456(%rbp,%r14,8) + cmpgxadd %eax, %ecx, 268435456(%rbp,%r14,8) -// CHECK: cmpnlexadd %eax, %ecx, 291(%r8,%rax,4) +// CHECK: cmpgxadd %eax, %ecx, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlexadd %eax, %ecx, 291(%r8,%rax,4) + cmpgxadd %eax, %ecx, 291(%r8,%rax,4) -// CHECK: cmpnlexadd %eax, %ecx, (%rip) +// CHECK: cmpgxadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00] - cmpnlexadd %eax, %ecx, (%rip) + cmpgxadd %eax, %ecx, (%rip) -// CHECK: cmpnlexadd %eax, %ecx, -128(,%rbp,2) +// CHECK: cmpgxadd %eax, %ecx, -128(,%rbp,2) // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnlexadd %eax, %ecx, -128(,%rbp,2) + cmpgxadd %eax, %ecx, -128(,%rbp,2) -// CHECK: cmpnlexadd %eax, %ecx, 508(%rcx) +// CHECK: cmpgxadd %eax, %ecx, 508(%rcx) // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00] - cmpnlexadd %eax, %ecx, 508(%rcx) + cmpgxadd %eax, %ecx, 508(%rcx) -// CHECK: cmpnlexadd %eax, %ecx, -512(%rdx) +// CHECK: cmpgxadd %eax, %ecx, -512(%rdx) // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff] - cmpnlexadd %eax, %ecx, -512(%rdx) + cmpgxadd %eax, %ecx, -512(%rdx) -// CHECK: cmpnlexadd %r10, %r9, 268435456(%rbp,%r14,8) +// CHECK: cmpgxadd %r10, %r9, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlexadd %r10, %r9, 268435456(%rbp,%r14,8) + cmpgxadd %r10, %r9, 268435456(%rbp,%r14,8) -// CHECK: cmpnlexadd %r10, %r9, 291(%r8,%rax,4) +// CHECK: cmpgxadd %r10, %r9, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlexadd %r10, %r9, 291(%r8,%rax,4) + cmpgxadd %r10, %r9, 291(%r8,%rax,4) -// CHECK: cmpnlexadd %r10, %r9, (%rip) +// CHECK: cmpgxadd %r10, %r9, (%rip) // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00] - cmpnlexadd %r10, %r9, (%rip) + cmpgxadd %r10, %r9, (%rip) -// CHECK: cmpnlexadd %r10, %r9, -256(,%rbp,2) +// CHECK: cmpgxadd %r10, %r9, -256(,%rbp,2) // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnlexadd %r10, %r9, -256(,%rbp,2) + cmpgxadd %r10, %r9, -256(,%rbp,2) -// CHECK: cmpnlexadd %r10, %r9, 1016(%rcx) +// CHECK: cmpgxadd %r10, %r9, 1016(%rcx) // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00] - cmpnlexadd %r10, %r9, 1016(%rcx) + cmpgxadd %r10, %r9, 1016(%rcx) -// CHECK: cmpnlexadd %r10, %r9, -1024(%rdx) +// CHECK: cmpgxadd %r10, %r9, -1024(%rdx) // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff] - cmpnlexadd %r10, %r9, -1024(%rdx) + cmpgxadd %r10, %r9, -1024(%rdx) -// CHECK: cmpnlxadd %eax, %ecx, 268435456(%rbp,%r14,8) +// CHECK: cmpgexadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlxadd %eax, %ecx, 268435456(%rbp,%r14,8) + cmpgexadd %eax, %ecx, 268435456(%rbp,%r14,8) -// CHECK: cmpnlxadd %eax, %ecx, 291(%r8,%rax,4) +// CHECK: cmpgexadd %eax, %ecx, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlxadd %eax, %ecx, 291(%r8,%rax,4) + cmpgexadd %eax, %ecx, 291(%r8,%rax,4) -// CHECK: cmpnlxadd %eax, %ecx, (%rip) +// CHECK: cmpgexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00] - cmpnlxadd %eax, %ecx, (%rip) + cmpgexadd %eax, %ecx, (%rip) -// CHECK: cmpnlxadd %eax, %ecx, -128(,%rbp,2) +// CHECK: cmpgexadd %eax, %ecx, -128(,%rbp,2) // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnlxadd %eax, %ecx, -128(,%rbp,2) + cmpgexadd %eax, %ecx, -128(,%rbp,2) -// CHECK: cmpnlxadd %eax, %ecx, 508(%rcx) +// CHECK: cmpgexadd %eax, %ecx, 508(%rcx) // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00] - cmpnlxadd %eax, %ecx, 508(%rcx) + cmpgexadd %eax, %ecx, 508(%rcx) -// CHECK: cmpnlxadd %eax, %ecx, -512(%rdx) +// CHECK: cmpgexadd %eax, %ecx, -512(%rdx) // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff] - cmpnlxadd %eax, %ecx, -512(%rdx) + cmpgexadd %eax, %ecx, -512(%rdx) -// CHECK: cmpnlxadd %r10, %r9, 268435456(%rbp,%r14,8) +// CHECK: cmpgexadd %r10, %r9, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlxadd %r10, %r9, 268435456(%rbp,%r14,8) + cmpgexadd %r10, %r9, 268435456(%rbp,%r14,8) -// CHECK: cmpnlxadd %r10, %r9, 291(%r8,%rax,4) +// CHECK: cmpgexadd %r10, %r9, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlxadd %r10, %r9, 291(%r8,%rax,4) + cmpgexadd %r10, %r9, 291(%r8,%rax,4) -// CHECK: cmpnlxadd %r10, %r9, (%rip) +// CHECK: cmpgexadd %r10, %r9, (%rip) // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00] - cmpnlxadd %r10, %r9, (%rip) + cmpgexadd %r10, %r9, (%rip) -// CHECK: cmpnlxadd %r10, %r9, -256(,%rbp,2) +// CHECK: cmpgexadd %r10, %r9, -256(,%rbp,2) // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnlxadd %r10, %r9, -256(,%rbp,2) + cmpgexadd %r10, %r9, -256(,%rbp,2) -// CHECK: cmpnlxadd %r10, %r9, 1016(%rcx) +// CHECK: cmpgexadd %r10, %r9, 1016(%rcx) // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00] - cmpnlxadd %r10, %r9, 1016(%rcx) + cmpgexadd %r10, %r9, 1016(%rcx) -// CHECK: cmpnlxadd %r10, %r9, -1024(%rdx) +// CHECK: cmpgexadd %r10, %r9, -1024(%rdx) // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff] - cmpnlxadd %r10, %r9, -1024(%rdx) + cmpgexadd %r10, %r9, -1024(%rdx) // CHECK: cmpnoxadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xe1,0x8c,0xf5,0x00,0x00,0x00,0x10] @@ -532,53 +532,53 @@ // CHECK: encoding: [0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff] cmpnsxadd %r10, %r9, -1024(%rdx) -// CHECK: cmpnzxadd %eax, %ecx, 268435456(%rbp,%r14,8) +// CHECK: cmpnexadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnzxadd %eax, %ecx, 268435456(%rbp,%r14,8) + cmpnexadd %eax, %ecx, 268435456(%rbp,%r14,8) -// CHECK: cmpnzxadd %eax, %ecx, 291(%r8,%rax,4) +// CHECK: cmpnexadd %eax, %ecx, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnzxadd %eax, %ecx, 291(%r8,%rax,4) + cmpnexadd %eax, %ecx, 291(%r8,%rax,4) -// CHECK: cmpnzxadd %eax, %ecx, (%rip) +// CHECK: cmpnexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00] - cmpnzxadd %eax, %ecx, (%rip) + cmpnexadd %eax, %ecx, (%rip) -// CHECK: cmpnzxadd %eax, %ecx, -128(,%rbp,2) +// CHECK: cmpnexadd %eax, %ecx, -128(,%rbp,2) // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnzxadd %eax, %ecx, -128(,%rbp,2) + cmpnexadd %eax, %ecx, -128(,%rbp,2) -// CHECK: cmpnzxadd %eax, %ecx, 508(%rcx) +// CHECK: cmpnexadd %eax, %ecx, 508(%rcx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00] - cmpnzxadd %eax, %ecx, 508(%rcx) + cmpnexadd %eax, %ecx, 508(%rcx) -// CHECK: cmpnzxadd %eax, %ecx, -512(%rdx) +// CHECK: cmpnexadd %eax, %ecx, -512(%rdx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff] - cmpnzxadd %eax, %ecx, -512(%rdx) + cmpnexadd %eax, %ecx, -512(%rdx) -// CHECK: cmpnzxadd %r10, %r9, 268435456(%rbp,%r14,8) +// CHECK: cmpnexadd %r10, %r9, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnzxadd %r10, %r9, 268435456(%rbp,%r14,8) + cmpnexadd %r10, %r9, 268435456(%rbp,%r14,8) -// CHECK: cmpnzxadd %r10, %r9, 291(%r8,%rax,4) +// CHECK: cmpnexadd %r10, %r9, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnzxadd %r10, %r9, 291(%r8,%rax,4) + cmpnexadd %r10, %r9, 291(%r8,%rax,4) -// CHECK: cmpnzxadd %r10, %r9, (%rip) +// CHECK: cmpnexadd %r10, %r9, (%rip) // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00] - cmpnzxadd %r10, %r9, (%rip) + cmpnexadd %r10, %r9, (%rip) -// CHECK: cmpnzxadd %r10, %r9, -256(,%rbp,2) +// CHECK: cmpnexadd %r10, %r9, -256(,%rbp,2) // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnzxadd %r10, %r9, -256(,%rbp,2) + cmpnexadd %r10, %r9, -256(,%rbp,2) -// CHECK: cmpnzxadd %r10, %r9, 1016(%rcx) +// CHECK: cmpnexadd %r10, %r9, 1016(%rcx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00] - cmpnzxadd %r10, %r9, 1016(%rcx) + cmpnexadd %r10, %r9, 1016(%rcx) -// CHECK: cmpnzxadd %r10, %r9, -1024(%rdx) +// CHECK: cmpnexadd %r10, %r9, -1024(%rdx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff] - cmpnzxadd %r10, %r9, -1024(%rdx) + cmpnexadd %r10, %r9, -1024(%rdx) // CHECK: cmpoxadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xe0,0x8c,0xf5,0x00,0x00,0x00,0x10] @@ -724,53 +724,53 @@ // CHECK: encoding: [0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff] cmpsxadd %r10, %r9, -1024(%rdx) -// CHECK: cmpzxadd %eax, %ecx, 268435456(%rbp,%r14,8) +// CHECK: cmpexadd %eax, %ecx, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpzxadd %eax, %ecx, 268435456(%rbp,%r14,8) + cmpexadd %eax, %ecx, 268435456(%rbp,%r14,8) -// CHECK: cmpzxadd %eax, %ecx, 291(%r8,%rax,4) +// CHECK: cmpexadd %eax, %ecx, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpzxadd %eax, %ecx, 291(%r8,%rax,4) + cmpexadd %eax, %ecx, 291(%r8,%rax,4) -// CHECK: cmpzxadd %eax, %ecx, (%rip) +// CHECK: cmpexadd %eax, %ecx, (%rip) // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00] - cmpzxadd %eax, %ecx, (%rip) + cmpexadd %eax, %ecx, (%rip) -// CHECK: cmpzxadd %eax, %ecx, -128(,%rbp,2) +// CHECK: cmpexadd %eax, %ecx, -128(,%rbp,2) // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpzxadd %eax, %ecx, -128(,%rbp,2) + cmpexadd %eax, %ecx, -128(,%rbp,2) -// CHECK: cmpzxadd %eax, %ecx, 508(%rcx) +// CHECK: cmpexadd %eax, %ecx, 508(%rcx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00] - cmpzxadd %eax, %ecx, 508(%rcx) + cmpexadd %eax, %ecx, 508(%rcx) -// CHECK: cmpzxadd %eax, %ecx, -512(%rdx) +// CHECK: cmpexadd %eax, %ecx, -512(%rdx) // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff] - cmpzxadd %eax, %ecx, -512(%rdx) + cmpexadd %eax, %ecx, -512(%rdx) -// CHECK: cmpzxadd %r10, %r9, 268435456(%rbp,%r14,8) +// CHECK: cmpexadd %r10, %r9, 268435456(%rbp,%r14,8) // CHECK: encoding: [0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpzxadd %r10, %r9, 268435456(%rbp,%r14,8) + cmpexadd %r10, %r9, 268435456(%rbp,%r14,8) -// CHECK: cmpzxadd %r10, %r9, 291(%r8,%rax,4) +// CHECK: cmpexadd %r10, %r9, 291(%r8,%rax,4) // CHECK: encoding: [0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpzxadd %r10, %r9, 291(%r8,%rax,4) + cmpexadd %r10, %r9, 291(%r8,%rax,4) -// CHECK: cmpzxadd %r10, %r9, (%rip) +// CHECK: cmpexadd %r10, %r9, (%rip) // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00] - cmpzxadd %r10, %r9, (%rip) + cmpexadd %r10, %r9, (%rip) -// CHECK: cmpzxadd %r10, %r9, -256(,%rbp,2) +// CHECK: cmpexadd %r10, %r9, -256(,%rbp,2) // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpzxadd %r10, %r9, -256(,%rbp,2) + cmpexadd %r10, %r9, -256(,%rbp,2) -// CHECK: cmpzxadd %r10, %r9, 1016(%rcx) +// CHECK: cmpexadd %r10, %r9, 1016(%rcx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00] - cmpzxadd %r10, %r9, 1016(%rcx) + cmpexadd %r10, %r9, 1016(%rcx) -// CHECK: cmpzxadd %r10, %r9, -1024(%rdx) +// CHECK: cmpexadd %r10, %r9, -1024(%rdx) // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff] - cmpzxadd %r10, %r9, -1024(%rdx) + cmpexadd %r10, %r9, -1024(%rdx) // CHECK: cmpbexadd %ecx, %r8d, (%rip) // CHECK: encoding: [0xc4,0x62,0x71,0xe6,0x05,0x00,0x00,0x00,0x00] diff --git a/llvm/test/MC/X86/cmpccxadd-intel-alias.s b/llvm/test/MC/X86/cmpccxadd-intel-alias.s index f5c7a6b6a2e0a5..6228d7fc67231d 100644 --- a/llvm/test/MC/X86/cmpccxadd-intel-alias.s +++ b/llvm/test/MC/X86/cmpccxadd-intel-alias.s @@ -1,28 +1,28 @@ // RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s -// CHECK: cmpnbxadd dword ptr [rip], ecx, eax +// CHECK: cmpaexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00] - cmpaexadd dword ptr [rip], ecx, eax + cmpnbxadd dword ptr [rip], ecx, eax -// CHECK: cmpzxadd dword ptr [rip], ecx, eax +// CHECK: cmpexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00] - cmpexadd dword ptr [rip], ecx, eax + cmpzxadd dword ptr [rip], ecx, eax -// CHECK: cmpnzxadd dword ptr [rip], ecx, eax +// CHECK: cmpnexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00] - cmpnexadd dword ptr [rip], ecx, eax + cmpnzxadd dword ptr [rip], ecx, eax -// CHECK: cmpnbexadd dword ptr [rip], ecx, eax +// CHECK: cmpaxadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00] - cmpaxadd dword ptr [rip], ecx, eax + cmpnbexadd dword ptr [rip], ecx, eax -// CHECK: cmpnlxadd dword ptr [rip], ecx, eax +// CHECK: cmpgexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00] - cmpgexadd dword ptr [rip], ecx, eax + cmpnlxadd dword ptr [rip], ecx, eax -// CHECK: cmpnlexadd dword ptr [rip], ecx, eax +// CHECK: cmpgxadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00] - cmpgxadd dword ptr [rip], ecx, eax + cmpnlexadd dword ptr [rip], ecx, eax // CHECK: cmpbxadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00] @@ -32,7 +32,7 @@ // CHECK: encoding: [0xc4,0xe2,0x79,0xe2,0x0d,0x00,0x00,0x00,0x00] cmpnaexadd dword ptr [rip], ecx, eax -// CHECK: cmpnbxadd dword ptr [rip], ecx, eax +// CHECK: cmpaexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00] cmpncxadd dword ptr [rip], ecx, eax diff --git a/llvm/test/MC/X86/cmpccxadd-intel.s b/llvm/test/MC/X86/cmpccxadd-intel.s index c03873e34decea..af7c6c3b61c949 100644 --- a/llvm/test/MC/X86/cmpccxadd-intel.s +++ b/llvm/test/MC/X86/cmpccxadd-intel.s @@ -192,197 +192,197 @@ // CHECK: encoding: [0xc4,0x62,0xa9,0xec,0x8a,0x00,0xfc,0xff,0xff] cmplxadd qword ptr [rdx - 1024], r9, r10 -// CHECK: cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +// CHECK: cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax + cmpaxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax -// CHECK: cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax +// CHECK: cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax // CHECK: encoding: [0xc4,0xc2,0x79,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbexadd dword ptr [r8 + 4*rax + 291], ecx, eax + cmpaxadd dword ptr [r8 + 4*rax + 291], ecx, eax -// CHECK: cmpnbexadd dword ptr [rip], ecx, eax +// CHECK: cmpaxadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0d,0x00,0x00,0x00,0x00] - cmpnbexadd dword ptr [rip], ecx, eax + cmpaxadd dword ptr [rip], ecx, eax -// CHECK: cmpnbexadd dword ptr [2*rbp - 128], ecx, eax +// CHECK: cmpaxadd dword ptr [2*rbp - 128], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnbexadd dword ptr [2*rbp - 128], ecx, eax + cmpaxadd dword ptr [2*rbp - 128], ecx, eax -// CHECK: cmpnbexadd dword ptr [rcx + 508], ecx, eax +// CHECK: cmpaxadd dword ptr [rcx + 508], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x89,0xfc,0x01,0x00,0x00] - cmpnbexadd dword ptr [rcx + 508], ecx, eax + cmpaxadd dword ptr [rcx + 508], ecx, eax -// CHECK: cmpnbexadd dword ptr [rdx - 512], ecx, eax +// CHECK: cmpaxadd dword ptr [rdx - 512], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe7,0x8a,0x00,0xfe,0xff,0xff] - cmpnbexadd dword ptr [rdx - 512], ecx, eax + cmpaxadd dword ptr [rdx - 512], ecx, eax -// CHECK: cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +// CHECK: cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 // CHECK: encoding: [0xc4,0x22,0xa9,0xe7,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 + cmpaxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 -// CHECK: cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10 +// CHECK: cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10 // CHECK: encoding: [0xc4,0x42,0xa9,0xe7,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbexadd qword ptr [r8 + 4*rax + 291], r9, r10 + cmpaxadd qword ptr [r8 + 4*rax + 291], r9, r10 -// CHECK: cmpnbexadd qword ptr [rip], r9, r10 +// CHECK: cmpaxadd qword ptr [rip], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0d,0x00,0x00,0x00,0x00] - cmpnbexadd qword ptr [rip], r9, r10 + cmpaxadd qword ptr [rip], r9, r10 -// CHECK: cmpnbexadd qword ptr [2*rbp - 256], r9, r10 +// CHECK: cmpaxadd qword ptr [2*rbp - 256], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnbexadd qword ptr [2*rbp - 256], r9, r10 + cmpaxadd qword ptr [2*rbp - 256], r9, r10 -// CHECK: cmpnbexadd qword ptr [rcx + 1016], r9, r10 +// CHECK: cmpaxadd qword ptr [rcx + 1016], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x89,0xf8,0x03,0x00,0x00] - cmpnbexadd qword ptr [rcx + 1016], r9, r10 + cmpaxadd qword ptr [rcx + 1016], r9, r10 -// CHECK: cmpnbexadd qword ptr [rdx - 1024], r9, r10 +// CHECK: cmpaxadd qword ptr [rdx - 1024], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe7,0x8a,0x00,0xfc,0xff,0xff] - cmpnbexadd qword ptr [rdx - 1024], r9, r10 + cmpaxadd qword ptr [rdx - 1024], r9, r10 -// CHECK: cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +// CHECK: cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax + cmpaexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax -// CHECK: cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax +// CHECK: cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax // CHECK: encoding: [0xc4,0xc2,0x79,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbxadd dword ptr [r8 + 4*rax + 291], ecx, eax + cmpaexadd dword ptr [r8 + 4*rax + 291], ecx, eax -// CHECK: cmpnbxadd dword ptr [rip], ecx, eax +// CHECK: cmpaexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0d,0x00,0x00,0x00,0x00] - cmpnbxadd dword ptr [rip], ecx, eax + cmpaexadd dword ptr [rip], ecx, eax -// CHECK: cmpnbxadd dword ptr [2*rbp - 128], ecx, eax +// CHECK: cmpaexadd dword ptr [2*rbp - 128], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnbxadd dword ptr [2*rbp - 128], ecx, eax + cmpaexadd dword ptr [2*rbp - 128], ecx, eax -// CHECK: cmpnbxadd dword ptr [rcx + 508], ecx, eax +// CHECK: cmpaexadd dword ptr [rcx + 508], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x89,0xfc,0x01,0x00,0x00] - cmpnbxadd dword ptr [rcx + 508], ecx, eax + cmpaexadd dword ptr [rcx + 508], ecx, eax -// CHECK: cmpnbxadd dword ptr [rdx - 512], ecx, eax +// CHECK: cmpaexadd dword ptr [rdx - 512], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe3,0x8a,0x00,0xfe,0xff,0xff] - cmpnbxadd dword ptr [rdx - 512], ecx, eax + cmpaexadd dword ptr [rdx - 512], ecx, eax -// CHECK: cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +// CHECK: cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 // CHECK: encoding: [0xc4,0x22,0xa9,0xe3,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnbxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 + cmpaexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 -// CHECK: cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10 +// CHECK: cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10 // CHECK: encoding: [0xc4,0x42,0xa9,0xe3,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnbxadd qword ptr [r8 + 4*rax + 291], r9, r10 + cmpaexadd qword ptr [r8 + 4*rax + 291], r9, r10 -// CHECK: cmpnbxadd qword ptr [rip], r9, r10 +// CHECK: cmpaexadd qword ptr [rip], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0d,0x00,0x00,0x00,0x00] - cmpnbxadd qword ptr [rip], r9, r10 + cmpaexadd qword ptr [rip], r9, r10 -// CHECK: cmpnbxadd qword ptr [2*rbp - 256], r9, r10 +// CHECK: cmpaexadd qword ptr [2*rbp - 256], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnbxadd qword ptr [2*rbp - 256], r9, r10 + cmpaexadd qword ptr [2*rbp - 256], r9, r10 -// CHECK: cmpnbxadd qword ptr [rcx + 1016], r9, r10 +// CHECK: cmpaexadd qword ptr [rcx + 1016], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x89,0xf8,0x03,0x00,0x00] - cmpnbxadd qword ptr [rcx + 1016], r9, r10 + cmpaexadd qword ptr [rcx + 1016], r9, r10 -// CHECK: cmpnbxadd qword ptr [rdx - 1024], r9, r10 +// CHECK: cmpaexadd qword ptr [rdx - 1024], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe3,0x8a,0x00,0xfc,0xff,0xff] - cmpnbxadd qword ptr [rdx - 1024], r9, r10 + cmpaexadd qword ptr [rdx - 1024], r9, r10 -// CHECK: cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +// CHECK: cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax + cmpgxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax -// CHECK: cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax +// CHECK: cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax // CHECK: encoding: [0xc4,0xc2,0x79,0xef,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlexadd dword ptr [r8 + 4*rax + 291], ecx, eax + cmpgxadd dword ptr [r8 + 4*rax + 291], ecx, eax -// CHECK: cmpnlexadd dword ptr [rip], ecx, eax +// CHECK: cmpgxadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0d,0x00,0x00,0x00,0x00] - cmpnlexadd dword ptr [rip], ecx, eax + cmpgxadd dword ptr [rip], ecx, eax -// CHECK: cmpnlexadd dword ptr [2*rbp - 128], ecx, eax +// CHECK: cmpgxadd dword ptr [2*rbp - 128], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnlexadd dword ptr [2*rbp - 128], ecx, eax + cmpgxadd dword ptr [2*rbp - 128], ecx, eax -// CHECK: cmpnlexadd dword ptr [rcx + 508], ecx, eax +// CHECK: cmpgxadd dword ptr [rcx + 508], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x89,0xfc,0x01,0x00,0x00] - cmpnlexadd dword ptr [rcx + 508], ecx, eax + cmpgxadd dword ptr [rcx + 508], ecx, eax -// CHECK: cmpnlexadd dword ptr [rdx - 512], ecx, eax +// CHECK: cmpgxadd dword ptr [rdx - 512], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xef,0x8a,0x00,0xfe,0xff,0xff] - cmpnlexadd dword ptr [rdx - 512], ecx, eax + cmpgxadd dword ptr [rdx - 512], ecx, eax -// CHECK: cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +// CHECK: cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 // CHECK: encoding: [0xc4,0x22,0xa9,0xef,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 + cmpgxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 -// CHECK: cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10 +// CHECK: cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10 // CHECK: encoding: [0xc4,0x42,0xa9,0xef,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlexadd qword ptr [r8 + 4*rax + 291], r9, r10 + cmpgxadd qword ptr [r8 + 4*rax + 291], r9, r10 -// CHECK: cmpnlexadd qword ptr [rip], r9, r10 +// CHECK: cmpgxadd qword ptr [rip], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0d,0x00,0x00,0x00,0x00] - cmpnlexadd qword ptr [rip], r9, r10 + cmpgxadd qword ptr [rip], r9, r10 -// CHECK: cmpnlexadd qword ptr [2*rbp - 256], r9, r10 +// CHECK: cmpgxadd qword ptr [2*rbp - 256], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnlexadd qword ptr [2*rbp - 256], r9, r10 + cmpgxadd qword ptr [2*rbp - 256], r9, r10 -// CHECK: cmpnlexadd qword ptr [rcx + 1016], r9, r10 +// CHECK: cmpgxadd qword ptr [rcx + 1016], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x89,0xf8,0x03,0x00,0x00] - cmpnlexadd qword ptr [rcx + 1016], r9, r10 + cmpgxadd qword ptr [rcx + 1016], r9, r10 -// CHECK: cmpnlexadd qword ptr [rdx - 1024], r9, r10 +// CHECK: cmpgxadd qword ptr [rdx - 1024], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xef,0x8a,0x00,0xfc,0xff,0xff] - cmpnlexadd qword ptr [rdx - 1024], r9, r10 + cmpgxadd qword ptr [rdx - 1024], r9, r10 -// CHECK: cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +// CHECK: cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax + cmpgexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax -// CHECK: cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax +// CHECK: cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax // CHECK: encoding: [0xc4,0xc2,0x79,0xed,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlxadd dword ptr [r8 + 4*rax + 291], ecx, eax + cmpgexadd dword ptr [r8 + 4*rax + 291], ecx, eax -// CHECK: cmpnlxadd dword ptr [rip], ecx, eax +// CHECK: cmpgexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0d,0x00,0x00,0x00,0x00] - cmpnlxadd dword ptr [rip], ecx, eax + cmpgexadd dword ptr [rip], ecx, eax -// CHECK: cmpnlxadd dword ptr [2*rbp - 128], ecx, eax +// CHECK: cmpgexadd dword ptr [2*rbp - 128], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnlxadd dword ptr [2*rbp - 128], ecx, eax + cmpgexadd dword ptr [2*rbp - 128], ecx, eax -// CHECK: cmpnlxadd dword ptr [rcx + 508], ecx, eax +// CHECK: cmpgexadd dword ptr [rcx + 508], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x89,0xfc,0x01,0x00,0x00] - cmpnlxadd dword ptr [rcx + 508], ecx, eax + cmpgexadd dword ptr [rcx + 508], ecx, eax -// CHECK: cmpnlxadd dword ptr [rdx - 512], ecx, eax +// CHECK: cmpgexadd dword ptr [rdx - 512], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xed,0x8a,0x00,0xfe,0xff,0xff] - cmpnlxadd dword ptr [rdx - 512], ecx, eax + cmpgexadd dword ptr [rdx - 512], ecx, eax -// CHECK: cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +// CHECK: cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 // CHECK: encoding: [0xc4,0x22,0xa9,0xed,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnlxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 + cmpgexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 -// CHECK: cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10 +// CHECK: cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10 // CHECK: encoding: [0xc4,0x42,0xa9,0xed,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnlxadd qword ptr [r8 + 4*rax + 291], r9, r10 + cmpgexadd qword ptr [r8 + 4*rax + 291], r9, r10 -// CHECK: cmpnlxadd qword ptr [rip], r9, r10 +// CHECK: cmpgexadd qword ptr [rip], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0d,0x00,0x00,0x00,0x00] - cmpnlxadd qword ptr [rip], r9, r10 + cmpgexadd qword ptr [rip], r9, r10 -// CHECK: cmpnlxadd qword ptr [2*rbp - 256], r9, r10 +// CHECK: cmpgexadd qword ptr [2*rbp - 256], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnlxadd qword ptr [2*rbp - 256], r9, r10 + cmpgexadd qword ptr [2*rbp - 256], r9, r10 -// CHECK: cmpnlxadd qword ptr [rcx + 1016], r9, r10 +// CHECK: cmpgexadd qword ptr [rcx + 1016], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x89,0xf8,0x03,0x00,0x00] - cmpnlxadd qword ptr [rcx + 1016], r9, r10 + cmpgexadd qword ptr [rcx + 1016], r9, r10 -// CHECK: cmpnlxadd qword ptr [rdx - 1024], r9, r10 +// CHECK: cmpgexadd qword ptr [rdx - 1024], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xed,0x8a,0x00,0xfc,0xff,0xff] - cmpnlxadd qword ptr [rdx - 1024], r9, r10 + cmpgexadd qword ptr [rdx - 1024], r9, r10 // CHECK: cmpnoxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xe1,0x8c,0xf5,0x00,0x00,0x00,0x10] @@ -528,53 +528,53 @@ // CHECK: encoding: [0xc4,0x62,0xa9,0xe9,0x8a,0x00,0xfc,0xff,0xff] cmpnsxadd qword ptr [rdx - 1024], r9, r10 -// CHECK: cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +// CHECK: cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax + cmpnexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax -// CHECK: cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax +// CHECK: cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax // CHECK: encoding: [0xc4,0xc2,0x79,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnzxadd dword ptr [r8 + 4*rax + 291], ecx, eax + cmpnexadd dword ptr [r8 + 4*rax + 291], ecx, eax -// CHECK: cmpnzxadd dword ptr [rip], ecx, eax +// CHECK: cmpnexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0d,0x00,0x00,0x00,0x00] - cmpnzxadd dword ptr [rip], ecx, eax + cmpnexadd dword ptr [rip], ecx, eax -// CHECK: cmpnzxadd dword ptr [2*rbp - 128], ecx, eax +// CHECK: cmpnexadd dword ptr [2*rbp - 128], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpnzxadd dword ptr [2*rbp - 128], ecx, eax + cmpnexadd dword ptr [2*rbp - 128], ecx, eax -// CHECK: cmpnzxadd dword ptr [rcx + 508], ecx, eax +// CHECK: cmpnexadd dword ptr [rcx + 508], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x89,0xfc,0x01,0x00,0x00] - cmpnzxadd dword ptr [rcx + 508], ecx, eax + cmpnexadd dword ptr [rcx + 508], ecx, eax -// CHECK: cmpnzxadd dword ptr [rdx - 512], ecx, eax +// CHECK: cmpnexadd dword ptr [rdx - 512], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe5,0x8a,0x00,0xfe,0xff,0xff] - cmpnzxadd dword ptr [rdx - 512], ecx, eax + cmpnexadd dword ptr [rdx - 512], ecx, eax -// CHECK: cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +// CHECK: cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 // CHECK: encoding: [0xc4,0x22,0xa9,0xe5,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpnzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 + cmpnexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 -// CHECK: cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10 +// CHECK: cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10 // CHECK: encoding: [0xc4,0x42,0xa9,0xe5,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpnzxadd qword ptr [r8 + 4*rax + 291], r9, r10 + cmpnexadd qword ptr [r8 + 4*rax + 291], r9, r10 -// CHECK: cmpnzxadd qword ptr [rip], r9, r10 +// CHECK: cmpnexadd qword ptr [rip], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0d,0x00,0x00,0x00,0x00] - cmpnzxadd qword ptr [rip], r9, r10 + cmpnexadd qword ptr [rip], r9, r10 -// CHECK: cmpnzxadd qword ptr [2*rbp - 256], r9, r10 +// CHECK: cmpnexadd qword ptr [2*rbp - 256], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpnzxadd qword ptr [2*rbp - 256], r9, r10 + cmpnexadd qword ptr [2*rbp - 256], r9, r10 -// CHECK: cmpnzxadd qword ptr [rcx + 1016], r9, r10 +// CHECK: cmpnexadd qword ptr [rcx + 1016], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x89,0xf8,0x03,0x00,0x00] - cmpnzxadd qword ptr [rcx + 1016], r9, r10 + cmpnexadd qword ptr [rcx + 1016], r9, r10 -// CHECK: cmpnzxadd qword ptr [rdx - 1024], r9, r10 +// CHECK: cmpnexadd qword ptr [rdx - 1024], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe5,0x8a,0x00,0xfc,0xff,0xff] - cmpnzxadd qword ptr [rdx - 1024], r9, r10 + cmpnexadd qword ptr [rdx - 1024], r9, r10 // CHECK: cmpoxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xe0,0x8c,0xf5,0x00,0x00,0x00,0x10] @@ -720,53 +720,53 @@ // CHECK: encoding: [0xc4,0x62,0xa9,0xe8,0x8a,0x00,0xfc,0xff,0xff] cmpsxadd qword ptr [rdx - 1024], r9, r10 -// CHECK: cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax +// CHECK: cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax // CHECK: encoding: [0xc4,0xa2,0x79,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpzxadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax + cmpexadd dword ptr [rbp + 8*r14 + 268435456], ecx, eax -// CHECK: cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax +// CHECK: cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax // CHECK: encoding: [0xc4,0xc2,0x79,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpzxadd dword ptr [r8 + 4*rax + 291], ecx, eax + cmpexadd dword ptr [r8 + 4*rax + 291], ecx, eax -// CHECK: cmpzxadd dword ptr [rip], ecx, eax +// CHECK: cmpexadd dword ptr [rip], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0d,0x00,0x00,0x00,0x00] - cmpzxadd dword ptr [rip], ecx, eax + cmpexadd dword ptr [rip], ecx, eax -// CHECK: cmpzxadd dword ptr [2*rbp - 128], ecx, eax +// CHECK: cmpexadd dword ptr [2*rbp - 128], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x0c,0x6d,0x80,0xff,0xff,0xff] - cmpzxadd dword ptr [2*rbp - 128], ecx, eax + cmpexadd dword ptr [2*rbp - 128], ecx, eax -// CHECK: cmpzxadd dword ptr [rcx + 508], ecx, eax +// CHECK: cmpexadd dword ptr [rcx + 508], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x89,0xfc,0x01,0x00,0x00] - cmpzxadd dword ptr [rcx + 508], ecx, eax + cmpexadd dword ptr [rcx + 508], ecx, eax -// CHECK: cmpzxadd dword ptr [rdx - 512], ecx, eax +// CHECK: cmpexadd dword ptr [rdx - 512], ecx, eax // CHECK: encoding: [0xc4,0xe2,0x79,0xe4,0x8a,0x00,0xfe,0xff,0xff] - cmpzxadd dword ptr [rdx - 512], ecx, eax + cmpexadd dword ptr [rdx - 512], ecx, eax -// CHECK: cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 +// CHECK: cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 // CHECK: encoding: [0xc4,0x22,0xa9,0xe4,0x8c,0xf5,0x00,0x00,0x00,0x10] - cmpzxadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 + cmpexadd qword ptr [rbp + 8*r14 + 268435456], r9, r10 -// CHECK: cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10 +// CHECK: cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10 // CHECK: encoding: [0xc4,0x42,0xa9,0xe4,0x8c,0x80,0x23,0x01,0x00,0x00] - cmpzxadd qword ptr [r8 + 4*rax + 291], r9, r10 + cmpexadd qword ptr [r8 + 4*rax + 291], r9, r10 -// CHECK: cmpzxadd qword ptr [rip], r9, r10 +// CHECK: cmpexadd qword ptr [rip], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0d,0x00,0x00,0x00,0x00] - cmpzxadd qword ptr [rip], r9, r10 + cmpexadd qword ptr [rip], r9, r10 -// CHECK: cmpzxadd qword ptr [2*rbp - 256], r9, r10 +// CHECK: cmpexadd qword ptr [2*rbp - 256], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x0c,0x6d,0x00,0xff,0xff,0xff] - cmpzxadd qword ptr [2*rbp - 256], r9, r10 + cmpexadd qword ptr [2*rbp - 256], r9, r10 -// CHECK: cmpzxadd qword ptr [rcx + 1016], r9, r10 +// CHECK: cmpexadd qword ptr [rcx + 1016], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x89,0xf8,0x03,0x00,0x00] - cmpzxadd qword ptr [rcx + 1016], r9, r10 + cmpexadd qword ptr [rcx + 1016], r9, r10 -// CHECK: cmpzxadd qword ptr [rdx - 1024], r9, r10 +// CHECK: cmpexadd qword ptr [rdx - 1024], r9, r10 // CHECK: encoding: [0xc4,0x62,0xa9,0xe4,0x8a,0x00,0xfc,0xff,0xff] - cmpzxadd qword ptr [rdx - 1024], r9, r10 + cmpexadd qword ptr [rdx - 1024], r9, r10 // CHECK: cmpbexadd dword ptr [rip], r8d, ecx // CHECK: encoding: [0xc4,0x62,0x71,0xe6,0x05,0x00,0x00,0x00,0x00] From 1e34706232e5f2865ff918ba8e9f840f38cdef07 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Thu, 15 Aug 2024 14:30:00 +0800 Subject: [PATCH 30/47] [mlir][tosa] Add verifier for `tosa.table` (#103708) This patch adds a verifier to `tosa.table` which fixes a crash. Fix #103086. --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 2 ++ mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 23 +++++++++++++++++ mlir/test/Dialect/Tosa/invalid.mlir | 27 ++++++++++++++++++++ 3 files changed, 52 insertions(+) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 7871b46724a03d..0be0f8ef2d7a0c 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -897,6 +897,8 @@ def Tosa_TableOp : Tosa_InferShapedTypeOp<"table"> { let assemblyFormat = [{ $input `,` $table attr-dict `:` `(` type($input) `,` type($table) `)` `->` type($output) }]; + + let hasVerifier = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 39ea7a5b61f5ec..d4e49b6e3c044c 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -864,6 +864,29 @@ LogicalResult tosa::TableOp::inferReturnTypeComponents( return success(); } +LogicalResult tosa::TableOp::verify() { + TensorType inputType = getInput().getType(); + TensorType outputType = getOutput().getType(); + + if (inputType.hasRank() && outputType.hasRank() && + inputType.getRank() != outputType.getRank()) + return emitOpError() + << "expected input tensor rank to equal result tensor rank"; + + auto inputDims = inputType.getShape(); + auto outputDims = outputType.getShape(); + for (auto it : llvm::enumerate(llvm::zip(inputDims, outputDims))) { + int64_t dim = it.index(); + auto [inputDim, outputDim] = it.value(); + if (!ShapedType::isDynamic(outputDim) && outputDim != inputDim) { + return emitOpError() << "dim(result, " << dim << ") = " << outputDim + << " doesn't match dim(input, " << dim + << ") = " << inputDim; + } + } + return success(); +} + LogicalResult tosa::TileOp::inferReturnTypeComponents( MLIRContext *context, ::std::optional location, TileOp::Adaptor adaptor, diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index e1fcf056480083..e723aef3815ce6 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -448,3 +448,30 @@ func.func @test_large_constant_permutation() { %3 = tosa.transpose %2, %1 : (tensor, tensor<2xi32>) -> tensor return } + +// ----- + +// CHECK-LABEL: test_table_rank0_table +func.func @test_table_rank0_table(%arg0: tensor<64xi16>, %arg1: tensor) { + // expected-error@+1 {{'tosa.table' op operand #1 must be 1-d tensor, but got 'tensor'}} + %0 = tosa.table %arg0, %arg1 : (tensor<64xi16>, tensor) -> tensor<64xi16> + return +} + +// ----- + +// CHECK-LABEL: test_table_io_rank_mismatch +func.func @test_table_io_rank_mismatch(%arg0: tensor<64xi16>, %arg1: tensor<6xi16>) { + // expected-error@+1 {{'tosa.table' op expected input tensor rank to equal result tensor rank}} + %0 = tosa.table %arg0, %arg1 : (tensor<64xi16>, tensor<6xi16>) -> tensor<64x?xi16> + return +} + +// ----- + +// CHECK-LABEL: test_table_io_shape_mismatch +func.func @test_table_io_shape_mismatch(%arg0: tensor, %arg1: tensor<6xi16>) { + // expected-error@+1 {{'tosa.table' op dim(result, 1) = 15 doesn't match dim(input, 1) = 16}} + %0 = tosa.table %arg0, %arg1 : (tensor, tensor<6xi16>) -> tensor + return +} From 3eaf483c296bd95411bc855674707f289790e2a2 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 15 Aug 2024 08:43:43 +0200 Subject: [PATCH 31/47] [include-cleaner] Remove two commented-out lines of code. --- clang-tools-extra/include-cleaner/lib/WalkAST.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp index f7cc9d19123635..b15d428326ac12 100644 --- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -271,7 +271,6 @@ class ASTWalker : public RecursiveASTVisitor { // specialized template. Implicit ones are filtered out by RAV. bool VisitClassTemplateSpecializationDecl(ClassTemplateSpecializationDecl *CTSD) { - // if (CTSD->isExplicitSpecialization()) if (clang::isTemplateExplicitInstantiationOrSpecialization( CTSD->getTemplateSpecializationKind())) report(CTSD->getLocation(), @@ -279,7 +278,6 @@ class ASTWalker : public RecursiveASTVisitor { return true; } bool VisitVarTemplateSpecializationDecl(VarTemplateSpecializationDecl *VTSD) { - // if (VTSD->isExplicitSpecialization()) if (clang::isTemplateExplicitInstantiationOrSpecialization( VTSD->getTemplateSpecializationKind())) report(VTSD->getLocation(), From 12763a06526f5fee46d8d11953b1188bad9e7b0e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 15 Aug 2024 08:04:22 +0100 Subject: [PATCH 32/47] [VPlan] Move VPWidenStoreRecipe::execute to VPlanRecipes.cpp (NFC). Move VPWidenStoreRecipe::execute to VPlanRecipes.cpp in line with other ::execute implementations that don't depend on anything defined in LoopVectorization.cpp --- .../Transforms/Vectorize/LoopVectorize.cpp | 40 ------------------ .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 42 +++++++++++++++++++ 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index f1bb96a38cfaa9..fdf8f7042c4fb8 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9363,46 +9363,6 @@ void VPWidenLoadEVLRecipe::execute(VPTransformState &State) { State.set(this, Res, 0); } -void VPWidenStoreRecipe::execute(VPTransformState &State) { - auto *SI = cast(&Ingredient); - - VPValue *StoredVPValue = getStoredValue(); - bool CreateScatter = !isConsecutive(); - const Align Alignment = getLoadStoreAlignment(&Ingredient); - - auto &Builder = State.Builder; - State.setDebugLocFrom(getDebugLoc()); - - for (unsigned Part = 0; Part < State.UF; ++Part) { - Instruction *NewSI = nullptr; - Value *Mask = nullptr; - if (auto *VPMask = getMask()) { - // Mask reversal is only needed for non-all-one (null) masks, as reverse - // of a null all-one mask is a null mask. - Mask = State.get(VPMask, Part); - if (isReverse()) - Mask = Builder.CreateVectorReverse(Mask, "reverse"); - } - - Value *StoredVal = State.get(StoredVPValue, Part); - if (isReverse()) { - // If we store to reverse consecutive memory locations, then we need - // to reverse the order of elements in the stored value. - StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); - // We don't want to update the value in the map as it might be used in - // another expression. So don't call resetVectorValue(StoredVal). - } - Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateScatter); - if (CreateScatter) - NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask); - else if (Mask) - NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask); - else - NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment); - State.addMetadata(NewSI, SI); - } -} - void VPWidenStoreEVLRecipe::execute(VPTransformState &State) { assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " "explicit vector length."); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 911b2fe9e9a1eb..bc57ea4d52471e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2066,7 +2066,49 @@ void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent, O << " = vp.load "; printOperands(O, SlotTracker); } +#endif + +void VPWidenStoreRecipe::execute(VPTransformState &State) { + auto *SI = cast(&Ingredient); + + VPValue *StoredVPValue = getStoredValue(); + bool CreateScatter = !isConsecutive(); + const Align Alignment = getLoadStoreAlignment(&Ingredient); + + auto &Builder = State.Builder; + State.setDebugLocFrom(getDebugLoc()); + for (unsigned Part = 0; Part < State.UF; ++Part) { + Instruction *NewSI = nullptr; + Value *Mask = nullptr; + if (auto *VPMask = getMask()) { + // Mask reversal is only needed for non-all-one (null) masks, as reverse + // of a null all-one mask is a null mask. + Mask = State.get(VPMask, Part); + if (isReverse()) + Mask = Builder.CreateVectorReverse(Mask, "reverse"); + } + + Value *StoredVal = State.get(StoredVPValue, Part); + if (isReverse()) { + // If we store to reverse consecutive memory locations, then we need + // to reverse the order of elements in the stored value. + StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse"); + // We don't want to update the value in the map as it might be used in + // another expression. So don't call resetVectorValue(StoredVal). + } + Value *Addr = State.get(getAddr(), Part, /*IsScalar*/ !CreateScatter); + if (CreateScatter) + NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask); + else if (Mask) + NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask); + else + NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment); + State.addMetadata(NewSI, SI); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "WIDEN store "; From fa343be414f9364911b947f109f3df5539e23068 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 15 Aug 2024 15:56:33 +0900 Subject: [PATCH 33/47] Fix warnings in #102848 [-Wunused-but-set-variable] --- clang/lib/AST/MicrosoftMangle.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index a113574675b4c5..db8000e25dc7cc 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -2969,6 +2969,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, if (const auto *AT = ResultType->getContainedAutoType()) { assert(AT->getKeyword() == AutoTypeKeyword::Auto && "should only need to mangle auto!"); + (void)AT; Out << '?'; mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false); Out << '?'; @@ -2987,7 +2988,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T, // SourceRange Range)` for details. auto UseClangMangling = [](QualType ResultType) { QualType T = ResultType; - while (const auto *PT = dyn_cast(T.getTypePtr())) { + while (isa(T.getTypePtr())) { T = T->getPointeeType(); if (T.getQualifiers().hasAddressSpace()) return true; From 845431a54fc2befacdfea27a852f003ad61ba720 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 15 Aug 2024 10:48:27 +0300 Subject: [PATCH 34/47] [UnitTests] Convert some data layout parsing tests to GTest (#104346) For now, the testcases are grouped in a single TEST. I'll sort them out and add more testcases in follow-up commits. --- ...talayout-invalid-function-ptr-alignment.ll | 5 - .../datalayout-invalid-i8-alignment.ll | 5 - ...alayout-invalid-stack-natural-alignment.ll | 5 - .../invalid-datalayout-alloca-addrspace.ll | 4 - .../invalid-datalayout-globals-addrspace.ll | 4 - .../invalid-datalayout-index-size.ll | 3 - .../invalid-datalayout-program-addrspace.ll | 4 - llvm/test/Assembler/invalid-datalayout1.ll | 3 - llvm/test/Assembler/invalid-datalayout10.ll | 3 - llvm/test/Assembler/invalid-datalayout11.ll | 3 - llvm/test/Assembler/invalid-datalayout12.ll | 3 - llvm/test/Assembler/invalid-datalayout13.ll | 3 - llvm/test/Assembler/invalid-datalayout14.ll | 3 - llvm/test/Assembler/invalid-datalayout15.ll | 3 - llvm/test/Assembler/invalid-datalayout16.ll | 3 - llvm/test/Assembler/invalid-datalayout17.ll | 3 - llvm/test/Assembler/invalid-datalayout18.ll | 3 - llvm/test/Assembler/invalid-datalayout19.ll | 6 - llvm/test/Assembler/invalid-datalayout2.ll | 3 - llvm/test/Assembler/invalid-datalayout20.ll | 6 - llvm/test/Assembler/invalid-datalayout21.ll | 6 - llvm/test/Assembler/invalid-datalayout22.ll | 6 - llvm/test/Assembler/invalid-datalayout23.ll | 6 - llvm/test/Assembler/invalid-datalayout24.ll | 6 - llvm/test/Assembler/invalid-datalayout3.ll | 3 - llvm/test/Assembler/invalid-datalayout4.ll | 3 - llvm/test/Assembler/invalid-datalayout5.ll | 3 - llvm/test/Assembler/invalid-datalayout6.ll | 3 - llvm/test/Assembler/invalid-datalayout7.ll | 3 - llvm/test/Assembler/invalid-datalayout8.ll | 3 - llvm/test/Assembler/invalid-datalayout9.ll | 3 - llvm/unittests/IR/DataLayoutTest.cpp | 105 ++++++++++++++++++ 32 files changed, 105 insertions(+), 120 deletions(-) delete mode 100644 llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll delete mode 100644 llvm/test/Assembler/datalayout-invalid-i8-alignment.ll delete mode 100644 llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout-index-size.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout-program-addrspace.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout1.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout10.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout11.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout12.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout13.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout14.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout15.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout16.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout17.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout18.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout19.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout2.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout20.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout21.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout22.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout23.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout24.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout3.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout4.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout5.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout6.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout7.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout8.ll delete mode 100644 llvm/test/Assembler/invalid-datalayout9.ll diff --git a/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll b/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll deleted file mode 100644 index 7c1e070c292d18..00000000000000 --- a/llvm/test/Assembler/datalayout-invalid-function-ptr-alignment.ll +++ /dev/null @@ -1,5 +0,0 @@ -; RUN: not llvm-as %s 2>&1 | FileCheck %s - -; CHECK: error: Alignment is neither 0 nor a power of 2 - -target datalayout = "Fi24" diff --git a/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll b/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll deleted file mode 100644 index e12cfce0309746..00000000000000 --- a/llvm/test/Assembler/datalayout-invalid-i8-alignment.ll +++ /dev/null @@ -1,5 +0,0 @@ -; RUN: not llvm-as %s 2>&1 | FileCheck %s - -; CHECK: error: Invalid ABI alignment, i8 must be naturally aligned - -target datalayout = "i8:16" diff --git a/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll b/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll deleted file mode 100644 index 1ccfb7832a50cc..00000000000000 --- a/llvm/test/Assembler/datalayout-invalid-stack-natural-alignment.ll +++ /dev/null @@ -1,5 +0,0 @@ -; RUN: not llvm-as %s 2>&1 | FileCheck %s - -; CHECK: error: Alignment is neither 0 nor a power of 2 - -target datalayout = "S24" diff --git a/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll deleted file mode 100644 index f0407da73e4fc2..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout-alloca-addrspace.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -target datalayout = "A16777216" -; CHECK: Invalid address space, must be a 24-bit integer diff --git a/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll deleted file mode 100644 index 19bf77db329d2c..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout-globals-addrspace.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -; CHECK: Invalid address space, must be a 24-bit integer -target datalayout = "G16777216" diff --git a/llvm/test/Assembler/invalid-datalayout-index-size.ll b/llvm/test/Assembler/invalid-datalayout-index-size.ll deleted file mode 100644 index dc608cdd56a040..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout-index-size.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "p:64:64:64:128" -; CHECK: Index width cannot be larger than pointer width diff --git a/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll b/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll deleted file mode 100644 index e636b75dee4d04..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout-program-addrspace.ll +++ /dev/null @@ -1,4 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -; CHECK: Invalid address space, must be a 24-bit integer -target datalayout = "P16777216" diff --git a/llvm/test/Assembler/invalid-datalayout1.ll b/llvm/test/Assembler/invalid-datalayout1.ll deleted file mode 100644 index d1befdcdf294d5..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout1.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "^" -; CHECK: Unknown specifier in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout10.ll b/llvm/test/Assembler/invalid-datalayout10.ll deleted file mode 100644 index 9f19688f852b4a..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout10.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "m" -; CHECK: Expected mangling specifier in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout11.ll b/llvm/test/Assembler/invalid-datalayout11.ll deleted file mode 100644 index f8fed8ff9ff339..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout11.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "m." -; CHECK: Unexpected trailing characters after mangling specifier in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout12.ll b/llvm/test/Assembler/invalid-datalayout12.ll deleted file mode 100644 index d79c196baab16f..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout12.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "f" -; CHECK: Missing alignment specification in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout13.ll b/llvm/test/Assembler/invalid-datalayout13.ll deleted file mode 100644 index 5ac719dbb7a9c0..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout13.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = ":32" -; CHECK: Expected token before separator in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout14.ll b/llvm/test/Assembler/invalid-datalayout14.ll deleted file mode 100644 index 84634b52a146ca..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout14.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "i64:64:16" -; CHECK: Preferred alignment cannot be less than the ABI alignment diff --git a/llvm/test/Assembler/invalid-datalayout15.ll b/llvm/test/Assembler/invalid-datalayout15.ll deleted file mode 100644 index ea240b73fd25f2..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout15.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "i64:16:16777216" -; CHECK: Invalid preferred alignment, must be a 16bit integer diff --git a/llvm/test/Assembler/invalid-datalayout16.ll b/llvm/test/Assembler/invalid-datalayout16.ll deleted file mode 100644 index 0dd1abb629b6fc..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout16.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "i64:16777216:16777216" -; CHECK: Invalid ABI alignment, must be a 16bit integer diff --git a/llvm/test/Assembler/invalid-datalayout17.ll b/llvm/test/Assembler/invalid-datalayout17.ll deleted file mode 100644 index b7eab74ad2a8ca..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout17.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "i16777216:16:16" -; CHECK: Invalid bit width, must be a 24-bit integer diff --git a/llvm/test/Assembler/invalid-datalayout18.ll b/llvm/test/Assembler/invalid-datalayout18.ll deleted file mode 100644 index b9956f98c9c6dc..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout18.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "p:32:32:16" -; CHECK: Preferred alignment cannot be less than the ABI alignment diff --git a/llvm/test/Assembler/invalid-datalayout19.ll b/llvm/test/Assembler/invalid-datalayout19.ll deleted file mode 100644 index fc0fc468520928..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout19.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -target datalayout = "p:0:32:32" - -; CHECK: Invalid pointer size of 0 bytes - diff --git a/llvm/test/Assembler/invalid-datalayout2.ll b/llvm/test/Assembler/invalid-datalayout2.ll deleted file mode 100644 index a435612bf85459..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout2.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "m:v" -; CHECK: Unknown mangling in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout20.ll b/llvm/test/Assembler/invalid-datalayout20.ll deleted file mode 100644 index a9ac1d7fe0983a..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout20.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -target datalayout = "p:64:24:64" - -; CHECK: Pointer ABI alignment must be a power of 2 - diff --git a/llvm/test/Assembler/invalid-datalayout21.ll b/llvm/test/Assembler/invalid-datalayout21.ll deleted file mode 100644 index a39d1d7a14a86b..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout21.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -target datalayout = "p:64:64:24" - -; CHECK: Pointer preferred alignment must be a power of 2 - diff --git a/llvm/test/Assembler/invalid-datalayout22.ll b/llvm/test/Assembler/invalid-datalayout22.ll deleted file mode 100644 index 14e4c2822ce4b0..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout22.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -target datalayout = "v128:0:128" - -; CHECK: ABI alignment specification must be >0 for non-aggregate types - diff --git a/llvm/test/Assembler/invalid-datalayout23.ll b/llvm/test/Assembler/invalid-datalayout23.ll deleted file mode 100644 index 430326327bc116..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout23.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -target datalayout = "i32:24:32" - -; CHECK: Invalid ABI alignment, must be a power of 2 - diff --git a/llvm/test/Assembler/invalid-datalayout24.ll b/llvm/test/Assembler/invalid-datalayout24.ll deleted file mode 100644 index 616ec64518a5b9..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout24.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s - -target datalayout = "i32:32:24" - -; CHECK: Invalid preferred alignment, must be a power of 2 - diff --git a/llvm/test/Assembler/invalid-datalayout3.ll b/llvm/test/Assembler/invalid-datalayout3.ll deleted file mode 100644 index 44535fd055b5ea..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout3.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "n0" -; CHECK: Zero width native integer type in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout4.ll b/llvm/test/Assembler/invalid-datalayout4.ll deleted file mode 100644 index 99a6a6093954e1..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout4.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "p16777216:64:64:64" -; CHECK: Invalid address space, must be a 24-bit integer diff --git a/llvm/test/Assembler/invalid-datalayout5.ll b/llvm/test/Assembler/invalid-datalayout5.ll deleted file mode 100644 index 3ce8791c0870b4..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout5.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "a1:64" -; CHECK: Sized aggregate specification in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout6.ll b/llvm/test/Assembler/invalid-datalayout6.ll deleted file mode 100644 index 425099f7cad869..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout6.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "a:" -; CHECK: Trailing separator in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout7.ll b/llvm/test/Assembler/invalid-datalayout7.ll deleted file mode 100644 index 5e010710889f6d..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout7.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "p:48:52" -; CHECK: number of bits must be a byte width multiple diff --git a/llvm/test/Assembler/invalid-datalayout8.ll b/llvm/test/Assembler/invalid-datalayout8.ll deleted file mode 100644 index 28832ffb17dd05..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout8.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "e-p" -; CHECK: Missing size specification for pointer in datalayout string diff --git a/llvm/test/Assembler/invalid-datalayout9.ll b/llvm/test/Assembler/invalid-datalayout9.ll deleted file mode 100644 index dfeac65cf604d1..00000000000000 --- a/llvm/test/Assembler/invalid-datalayout9.ll +++ /dev/null @@ -1,3 +0,0 @@ -; RUN: not llvm-as < %s 2>&1 | FileCheck %s -target datalayout = "e-p:64" -; CHECK: Missing alignment specification for pointer in datalayout string diff --git a/llvm/unittests/IR/DataLayoutTest.cpp b/llvm/unittests/IR/DataLayoutTest.cpp index 113bb578f6bc3b..dcb2e614f4c40d 100644 --- a/llvm/unittests/IR/DataLayoutTest.cpp +++ b/llvm/unittests/IR/DataLayoutTest.cpp @@ -19,6 +19,111 @@ using namespace llvm; namespace { +// TODO: Split into multiple TESTs. +TEST(DataLayoutTest, ParseErrors) { + EXPECT_THAT_EXPECTED( + DataLayout::parse("^"), + FailedWithMessage("Unknown specifier in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("m:v"), + FailedWithMessage("Unknown mangling in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("n0"), + FailedWithMessage("Zero width native integer type in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("p16777216:64:64:64"), + FailedWithMessage("Invalid address space, must be a 24-bit integer")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("a1:64"), + FailedWithMessage("Sized aggregate specification in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("a:"), + FailedWithMessage("Trailing separator in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("p:48:52"), + FailedWithMessage("number of bits must be a byte width multiple")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("e-p"), + FailedWithMessage( + "Missing size specification for pointer in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("e-p:64"), + FailedWithMessage( + "Missing alignment specification for pointer in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("m"), + FailedWithMessage("Expected mangling specifier in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("m."), + FailedWithMessage("Unexpected trailing characters after mangling " + "specifier in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("f"), + FailedWithMessage( + "Missing alignment specification in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse(":32"), + FailedWithMessage( + "Expected token before separator in datalayout string")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("i64:64:16"), + FailedWithMessage( + "Preferred alignment cannot be less than the ABI alignment")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("i64:16:16777216"), + FailedWithMessage( + "Invalid preferred alignment, must be a 16bit integer")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("i64:16777216:16777216"), + FailedWithMessage("Invalid ABI alignment, must be a 16bit integer")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("i16777216:16:16"), + FailedWithMessage("Invalid bit width, must be a 24-bit integer")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("p:32:32:16"), + FailedWithMessage( + "Preferred alignment cannot be less than the ABI alignment")); + EXPECT_THAT_EXPECTED(DataLayout::parse("p:0:32:32"), + FailedWithMessage("Invalid pointer size of 0 bytes")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("p:64:24:64"), + FailedWithMessage("Pointer ABI alignment must be a power of 2")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("p:64:64:24"), + FailedWithMessage("Pointer preferred alignment must be a power of 2")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("p:64:64:64:128"), + FailedWithMessage("Index width cannot be larger than pointer width")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("v128:0:128"), + FailedWithMessage( + "ABI alignment specification must be >0 for non-aggregate types")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("i32:24:32"), + FailedWithMessage("Invalid ABI alignment, must be a power of 2")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("i32:32:24"), + FailedWithMessage("Invalid preferred alignment, must be a power of 2")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("A16777216"), + FailedWithMessage("Invalid address space, must be a 24-bit integer")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("G16777216"), + FailedWithMessage("Invalid address space, must be a 24-bit integer")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("P16777216"), + FailedWithMessage("Invalid address space, must be a 24-bit integer")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("Fi24"), + FailedWithMessage("Alignment is neither 0 nor a power of 2")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("i8:16"), + FailedWithMessage("Invalid ABI alignment, i8 must be naturally aligned")); + EXPECT_THAT_EXPECTED( + DataLayout::parse("S24"), + FailedWithMessage("Alignment is neither 0 nor a power of 2")); +} + TEST(DataLayoutTest, CopyAssignmentInvalidatesStructLayout) { DataLayout DL1 = cantFail(DataLayout::parse("p:32:32")); DataLayout DL2 = cantFail(DataLayout::parse("p:64:64")); From 4a00f1aab25353ca51b5d8e2b081cc66305b3cd8 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Thu, 15 Aug 2024 10:02:41 +0200 Subject: [PATCH 35/47] [mlir][test] XFAIL little-endian-only tests on SPARC (#103726) 3 MLIR tests `FAIL` on SPARC, both Solaris/sparcv9 and Linux/sparc64: ``` MLIR :: Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir MLIR :: IR/elements-attr-interface.mlir MLIR :: Target/LLVMIR/llvmir-le-specific.mlir ``` The issue is always the same: the tests in question are little-endian-only currently, so this patch `XFAIL`s them on `sparc*` as is already done for `s390x`. Tested on `sparcv9-sun-solaris2.11`, `sparc64-unknown-linux-gnu`, `amd64-pc-solaris2.11`, and `x86_64-pc-linux-gnu`. --- .../Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir | 2 +- mlir/test/IR/elements-attr-interface.mlir | 2 +- mlir/test/Target/LLVMIR/llvmir-le-specific.mlir | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir index 7233a8bfffa9db..47be1be30577d8 100644 --- a/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir +++ b/mlir/test/Conversion/ArithToSPIRV/arith-to-spirv-le-specific.mlir @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: target=s390x-{{.*}} +// XFAIL: target={{(s390x|sparc.*)-.*}} module attributes { spirv.target_env = #spirv.target_env< diff --git a/mlir/test/IR/elements-attr-interface.mlir b/mlir/test/IR/elements-attr-interface.mlir index 5234c81bd841e3..79283f1aae99a8 100644 --- a/mlir/test/IR/elements-attr-interface.mlir +++ b/mlir/test/IR/elements-attr-interface.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -test-elements-attr-interface -verify-diagnostics // Parsing external resources does not work on big-endian platforms currently -// XFAIL: target=s390x-{{.*}} +// XFAIL: target={{(s390x|sparc.*)-.*}} // This test contains various `ElementsAttr` attributes, and tests the support // for iterating the values of these attributes using various native C++ types. diff --git a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir index f8d082082117cb..98145bc35cba77 100644 --- a/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-le-specific.mlir @@ -1,7 +1,7 @@ // RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s // Decoding the attribute does not work on big-endian platforms currently -// XFAIL: target=s390x-{{.*}} +// XFAIL: target={{(s390x|sparc.*)-.*}} // CHECK{LITERAL}: @dense_resource_tensor_constant = internal constant [5 x float] [float 0x3FCA034080000000, float 0xBFD0466300000000, float 0xBFD75DDF80000000, float 0xBFDE074F40000000, float 0x3FDDD3A1C0000000] llvm.mlir.global internal constant @dense_resource_tensor_constant(dense_resource : tensor<5xf32>) : !llvm.array<5 x f32> @@ -24,4 +24,4 @@ llvm.mlir.global internal constant @dense_resource_multidim_vector_constant(dens dense_resource_test_2x2xf32: "0x0800000054A3B53ED6C0B33E55D1A2BDE5D2BB3E" } } -#-} \ No newline at end of file +#-} From cf2e10150a5a83cece4fb8935202f0d67307b5c8 Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Thu, 15 Aug 2024 10:04:49 +0200 Subject: [PATCH 36/47] [flang][test] Fix Lower/default-initialization-globals.f90 on SPARC (#103722) `Flang :: Lower/default-initialization-globals.f90` `FAIL`s on SPARC, both Solaris/sparcv9 and Linux/sparc64. The failure mode is same as on AIX/PowerPC, so both targets being big-endian, this patch treats them the same. Tested on `sparcv9-sun-solaris2.11`, `sparc64-unknown-linux-gnu`, `amd64-pc-solaris2.11`, and `x86_64-pc-linux-gnu`. --- flang/test/Lower/default-initialization-globals.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/test/Lower/default-initialization-globals.f90 b/flang/test/Lower/default-initialization-globals.f90 index 384d1cb763ad67..e9611dab467cba 100644 --- a/flang/test/Lower/default-initialization-globals.f90 +++ b/flang/test/Lower/default-initialization-globals.f90 @@ -1,5 +1,5 @@ ! Test default initialization of global variables (static init) -! RUN: bbc -hlfir=false %s -o - | FileCheck %s --check-prefixes=%if system-aix %{"CHECK","CHECK-BE"%} \ +! RUN: bbc -hlfir=false %s -o - | FileCheck %s --check-prefixes=%if target={{.*-aix.*|sparc.*}} %{"CHECK","CHECK-BE"%} \ ! RUN: %else %{"CHECK","CHECK-LE"%} module tinit From e1e47acafb81e583e5cf7b3b6d609f4b5726cc67 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 15 Aug 2024 11:06:47 +0300 Subject: [PATCH 37/47] [DataLayout] Move '*AlignElem' structs and enum inside DataLayout (NFC) (#103723) This makes `LayoutAlignElem` / `PointerAlignElem` and `AlignTypeEnum` inner types of `DataLayout`. The types are also renamed to match their meaning (LangRef refers to them as "specification" and "specifier"). Pull Request: https://github.com/llvm/llvm-project/pull/103723 --- llvm/include/llvm/IR/DataLayout.h | 107 ++++++-------- llvm/lib/IR/DataLayout.cpp | 235 +++++++++++++----------------- 2 files changed, 152 insertions(+), 190 deletions(-) diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h index 795cd05ea5b5e2..1185939cd9c75b 100644 --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -49,51 +49,11 @@ class StructLayout; class Triple; class Value; -/// Enum used to categorize the alignment types stored by LayoutAlignElem -enum AlignTypeEnum { - INTEGER_ALIGN = 'i', - VECTOR_ALIGN = 'v', - FLOAT_ALIGN = 'f', - AGGREGATE_ALIGN = 'a' -}; - // FIXME: Currently the DataLayout string carries a "preferred alignment" // for types. As the DataLayout is module/global, this should likely be // sunk down to an FTTI element that is queried rather than a global // preference. -/// Layout alignment element. -/// -/// Stores the alignment data associated with a given type bit width. -struct LayoutAlignElem { - uint32_t TypeBitWidth; - Align ABIAlign; - Align PrefAlign; - - static LayoutAlignElem get(Align ABIAlign, Align PrefAlign, - uint32_t BitWidth); - - bool operator==(const LayoutAlignElem &rhs) const; -}; - -/// Layout pointer alignment element. -/// -/// Stores the alignment data associated with a given pointer and address space. -struct PointerAlignElem { - uint32_t AddressSpace; - uint32_t TypeBitWidth; - Align ABIAlign; - Align PrefAlign; - uint32_t IndexBitWidth; - - /// Initializer - static PointerAlignElem getInBits(uint32_t AddressSpace, Align ABIAlign, - Align PrefAlign, uint32_t TypeBitWidth, - uint32_t IndexBitWidth); - - bool operator==(const PointerAlignElem &rhs) const; -}; - /// A parsed version of the target data layout string in and methods for /// querying it. /// @@ -102,6 +62,26 @@ struct PointerAlignElem { /// target being codegen'd to. class DataLayout { public: + /// Primitive type specification. + struct PrimitiveSpec { + uint32_t BitWidth; + Align ABIAlign; + Align PrefAlign; + + bool operator==(const PrimitiveSpec &Other) const; + }; + + /// Pointer type specification. + struct PointerSpec { + uint32_t AddrSpace; + uint32_t BitWidth; + Align ABIAlign; + Align PrefAlign; + uint32_t IndexBitWidth; + + bool operator==(const PointerSpec &Other) const; + }; + enum class FunctionPtrAlignType { /// The function pointer alignment is independent of the function alignment. Independent, @@ -135,20 +115,26 @@ class DataLayout { // FIXME: `unsigned char` truncates the value parsed by `parseSpecifier`. SmallVector LegalIntWidths; - // Primitive type specifications. Sorted and uniqued by type bit width. - SmallVector IntAlignments; - SmallVector FloatAlignments; - SmallVector VectorAlignments; + /// Type specifier used by some internal functions. + enum class TypeSpecifier { + Integer = 'i', + Float = 'f', + Vector = 'v', + Aggregate = 'a' + }; - // Pointer type specifications. Sorted and uniqued by address space number. - SmallVector Pointers; + /// Primitive type specifications. Sorted and uniqued by type bit width. + SmallVector IntSpecs; + SmallVector FloatSpecs; + SmallVector VectorSpecs; + + /// Pointer type specifications. Sorted and uniqued by address space number. + SmallVector PointerSpecs; /// The string representation used to create this DataLayout std::string StringRepresentation; - const PointerAlignElem &getPointerAlignElem(uint32_t AddressSpace) const; - - // Struct type ABI and preferred alignments. The default spec is "a:8:64". + /// Struct type ABI and preferred alignments. The default spec is "a:8:64". Align StructABIAlignment = Align::Constant<1>(); Align StructPrefAlignment = Align::Constant<8>(); @@ -159,16 +145,19 @@ class DataLayout { /// well-defined bitwise representation. SmallVector NonIntegralAddressSpaces; - /// Attempts to set the alignment of the given type. Returns an error - /// description on failure. - Error setAlignment(AlignTypeEnum AlignType, Align ABIAlign, Align PrefAlign, - uint32_t BitWidth); + /// Attempts to set the specification for the given type. + /// Returns an error description on failure. + Error setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth, + Align ABIAlign, Align PrefAlign); + + /// Searches for a pointer specification that matches the given address space. + /// Returns the default address space specification if not found. + const PointerSpec &getPointerSpec(uint32_t AddrSpace) const; - /// Attempts to set the alignment of a pointer in the given address space. + /// Attempts to set the specification for pointer in the given address space. /// Returns an error description on failure. - Error setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign, - Align PrefAlign, uint32_t TypeBitWidth, - uint32_t IndexBitWidth); + Error setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, Align ABIAlign, + Align PrefAlign, uint32_t IndexBitWidth); /// Internal helper to get alignment for integer of given bitwidth. Align getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const; @@ -375,7 +364,7 @@ class DataLayout { /// FIXME: The defaults need to be removed once all of /// the backends/clients are updated. unsigned getPointerSizeInBits(unsigned AS = 0) const { - return getPointerAlignElem(AS).TypeBitWidth; + return getPointerSpec(AS).BitWidth; } /// Returns the maximum index size over all address spaces. @@ -385,7 +374,7 @@ class DataLayout { /// Size in bits of index used for address calculation in getelementptr. unsigned getIndexSizeInBits(unsigned AS) const { - return getPointerAlignElem(AS).IndexBitWidth; + return getPointerSpec(AS).IndexBitWidth; } /// Layout pointer size, in bits, based on the type. If this function is diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp index 530979c75063b4..44cd1e69818953 100644 --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -139,53 +139,20 @@ class StructLayoutMap { } // end anonymous namespace //===----------------------------------------------------------------------===// -// LayoutAlignElem, LayoutAlign support -//===----------------------------------------------------------------------===// - -LayoutAlignElem LayoutAlignElem::get(Align ABIAlign, Align PrefAlign, - uint32_t BitWidth) { - assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); - LayoutAlignElem retval; - retval.ABIAlign = ABIAlign; - retval.PrefAlign = PrefAlign; - retval.TypeBitWidth = BitWidth; - return retval; -} - -bool LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const { - return ABIAlign == rhs.ABIAlign && PrefAlign == rhs.PrefAlign && - TypeBitWidth == rhs.TypeBitWidth; -} - -//===----------------------------------------------------------------------===// -// PointerAlignElem, PointerAlign support +// DataLayout Class Implementation //===----------------------------------------------------------------------===// -PointerAlignElem PointerAlignElem::getInBits(uint32_t AddressSpace, - Align ABIAlign, Align PrefAlign, - uint32_t TypeBitWidth, - uint32_t IndexBitWidth) { - assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!"); - PointerAlignElem retval; - retval.AddressSpace = AddressSpace; - retval.ABIAlign = ABIAlign; - retval.PrefAlign = PrefAlign; - retval.TypeBitWidth = TypeBitWidth; - retval.IndexBitWidth = IndexBitWidth; - return retval; +bool DataLayout::PrimitiveSpec::operator==(const PrimitiveSpec &Other) const { + return BitWidth == Other.BitWidth && ABIAlign == Other.ABIAlign && + PrefAlign == Other.PrefAlign; } -bool -PointerAlignElem::operator==(const PointerAlignElem &rhs) const { - return (ABIAlign == rhs.ABIAlign && AddressSpace == rhs.AddressSpace && - PrefAlign == rhs.PrefAlign && TypeBitWidth == rhs.TypeBitWidth && - IndexBitWidth == rhs.IndexBitWidth); +bool DataLayout::PointerSpec::operator==(const PointerSpec &Other) const { + return AddrSpace == Other.AddrSpace && BitWidth == Other.BitWidth && + ABIAlign == Other.ABIAlign && PrefAlign == Other.PrefAlign && + IndexBitWidth == Other.IndexBitWidth; } -//===----------------------------------------------------------------------===// -// DataLayout Class Implementation -//===----------------------------------------------------------------------===// - const char *DataLayout::getManglingComponent(const Triple &T) { if (T.isOSBinFormatGOFF()) return "-m:l"; @@ -200,34 +167,34 @@ const char *DataLayout::getManglingComponent(const Triple &T) { // Default primitive type specifications. // NOTE: These arrays must be sorted by type bit width. -constexpr LayoutAlignElem DefaultIntSpecs[] = { +constexpr DataLayout::PrimitiveSpec DefaultIntSpecs[] = { {1, Align::Constant<1>(), Align::Constant<1>()}, // i1:8:8 {8, Align::Constant<1>(), Align::Constant<1>()}, // i8:8:8 {16, Align::Constant<2>(), Align::Constant<2>()}, // i16:16:16 {32, Align::Constant<4>(), Align::Constant<4>()}, // i32:32:32 {64, Align::Constant<4>(), Align::Constant<8>()}, // i64:32:64 }; -constexpr LayoutAlignElem DefaultFloatSpecs[] = { +constexpr DataLayout::PrimitiveSpec DefaultFloatSpecs[] = { {16, Align::Constant<2>(), Align::Constant<2>()}, // f16:16:16 {32, Align::Constant<4>(), Align::Constant<4>()}, // f32:32:32 {64, Align::Constant<8>(), Align::Constant<8>()}, // f64:64:64 {128, Align::Constant<16>(), Align::Constant<16>()}, // f128:128:128 }; -constexpr LayoutAlignElem DefaultVectorSpecs[] = { +constexpr DataLayout::PrimitiveSpec DefaultVectorSpecs[] = { {64, Align::Constant<8>(), Align::Constant<8>()}, // v64:64:64 {128, Align::Constant<16>(), Align::Constant<16>()}, // v128:128:128 }; // Default pointer type specifications. -constexpr PointerAlignElem DefaultPointerSpecs[] = { +constexpr DataLayout::PointerSpec DefaultPointerSpecs[] = { {0, 64, Align::Constant<8>(), Align::Constant<8>(), 64} // p0:64:64:64:64 }; DataLayout::DataLayout() - : IntAlignments(ArrayRef(DefaultIntSpecs)), - FloatAlignments(ArrayRef(DefaultFloatSpecs)), - VectorAlignments(ArrayRef(DefaultVectorSpecs)), - Pointers(ArrayRef(DefaultPointerSpecs)) {} + : IntSpecs(ArrayRef(DefaultIntSpecs)), + FloatSpecs(ArrayRef(DefaultFloatSpecs)), + VectorSpecs(ArrayRef(DefaultVectorSpecs)), + PointerSpecs(ArrayRef(DefaultPointerSpecs)) {} DataLayout::DataLayout(StringRef LayoutString) : DataLayout() { if (Error Err = parseSpecifier(LayoutString)) @@ -247,10 +214,10 @@ DataLayout &DataLayout::operator=(const DataLayout &Other) { TheFunctionPtrAlignType = Other.TheFunctionPtrAlignType; ManglingMode = Other.ManglingMode; LegalIntWidths = Other.LegalIntWidths; - IntAlignments = Other.IntAlignments; - FloatAlignments = Other.FloatAlignments; - VectorAlignments = Other.VectorAlignments; - Pointers = Other.Pointers; + IntSpecs = Other.IntSpecs; + FloatSpecs = Other.FloatSpecs; + VectorSpecs = Other.VectorSpecs; + PointerSpecs = Other.PointerSpecs; StructABIAlignment = Other.StructABIAlignment; StructPrefAlignment = Other.StructPrefAlignment; NonIntegralAddressSpaces = Other.NonIntegralAddressSpaces; @@ -268,11 +235,9 @@ bool DataLayout::operator==(const DataLayout &Other) const { FunctionPtrAlign == Other.FunctionPtrAlign && TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType && ManglingMode == Other.ManglingMode && - LegalIntWidths == Other.LegalIntWidths && - IntAlignments == Other.IntAlignments && - FloatAlignments == Other.FloatAlignments && - VectorAlignments == Other.VectorAlignments && - Pointers == Other.Pointers && + LegalIntWidths == Other.LegalIntWidths && IntSpecs == Other.IntSpecs && + FloatSpecs == Other.FloatSpecs && VectorSpecs == Other.VectorSpecs && + PointerSpecs == Other.PointerSpecs && StructABIAlignment == Other.StructABIAlignment && StructPrefAlignment == Other.StructPrefAlignment; } @@ -361,10 +326,10 @@ Error DataLayout::parseSpecifier(StringRef Desc) { continue; } - char Specifier = Tok.front(); + char SpecifierChar = Tok.front(); Tok = Tok.substr(1); - switch (Specifier) { + switch (SpecifierChar) { case 's': // Deprecated, but ignoring here to preserve loading older textual llvm // ASM file @@ -433,9 +398,9 @@ Error DataLayout::parseSpecifier(StringRef Desc) { return reportError("Invalid index size of 0 bytes"); } } - if (Error Err = setPointerAlignmentInBits( - AddrSpace, assumeAligned(PointerABIAlign), - assumeAligned(PointerPrefAlign), PointerMemSize, IndexSize)) + if (Error Err = setPointerSpec( + AddrSpace, PointerMemSize, assumeAligned(PointerABIAlign), + assumeAligned(PointerPrefAlign), IndexSize)) return Err; break; } @@ -443,13 +408,22 @@ Error DataLayout::parseSpecifier(StringRef Desc) { case 'v': case 'f': case 'a': { - AlignTypeEnum AlignType; - switch (Specifier) { - default: llvm_unreachable("Unexpected specifier!"); - case 'i': AlignType = INTEGER_ALIGN; break; - case 'v': AlignType = VECTOR_ALIGN; break; - case 'f': AlignType = FLOAT_ALIGN; break; - case 'a': AlignType = AGGREGATE_ALIGN; break; + TypeSpecifier Specifier; + switch (SpecifierChar) { + default: + llvm_unreachable("Unexpected specifier!"); + case 'i': + Specifier = TypeSpecifier::Integer; + break; + case 'v': + Specifier = TypeSpecifier::Vector; + break; + case 'f': + Specifier = TypeSpecifier::Float; + break; + case 'a': + Specifier = TypeSpecifier::Aggregate; + break; } // Bit size. @@ -458,7 +432,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) { if (Error Err = getInt(Tok, Size)) return Err; - if (AlignType == AGGREGATE_ALIGN && Size != 0) + if (Specifier == TypeSpecifier::Aggregate && Size != 0) return reportError( "Sized aggregate specification in datalayout string"); @@ -471,7 +445,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) { unsigned ABIAlign; if (Error Err = getIntInBytes(Tok, ABIAlign)) return Err; - if (AlignType != AGGREGATE_ALIGN && !ABIAlign) + if (Specifier != TypeSpecifier::Aggregate && !ABIAlign) return reportError( "ABI alignment specification must be >0 for non-aggregate types"); @@ -479,7 +453,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) { return reportError("Invalid ABI alignment, must be a 16bit integer"); if (ABIAlign != 0 && !isPowerOf2_64(ABIAlign)) return reportError("Invalid ABI alignment, must be a power of 2"); - if (AlignType == INTEGER_ALIGN && Size == 8 && ABIAlign != 1) + if (Specifier == TypeSpecifier::Integer && Size == 8 && ABIAlign != 1) return reportError( "Invalid ABI alignment, i8 must be naturally aligned"); @@ -498,8 +472,8 @@ Error DataLayout::parseSpecifier(StringRef Desc) { if (PrefAlign != 0 && !isPowerOf2_64(PrefAlign)) return reportError("Invalid preferred alignment, must be a power of 2"); - if (Error Err = setAlignment(AlignType, assumeAligned(ABIAlign), - assumeAligned(PrefAlign), Size)) + if (Error Err = setPrimitiveSpec(Specifier, Size, assumeAligned(ABIAlign), + assumeAligned(PrefAlign))) return Err; break; @@ -607,16 +581,17 @@ Error DataLayout::parseSpecifier(StringRef Desc) { return Error::success(); } -static SmallVectorImpl::const_iterator -findAlignmentLowerBound(const SmallVectorImpl &Alignments, - uint32_t BitWidth) { - return partition_point(Alignments, [BitWidth](const LayoutAlignElem &E) { - return E.TypeBitWidth < BitWidth; +static SmallVectorImpl::const_iterator +findPrimitiveSpecLowerBound( + const SmallVectorImpl &Specs, + uint32_t BitWidth) { + return partition_point(Specs, [BitWidth](const DataLayout::PrimitiveSpec &E) { + return E.BitWidth < BitWidth; }); } -Error DataLayout::setAlignment(AlignTypeEnum AlignType, Align ABIAlign, - Align PrefAlign, uint32_t BitWidth) { +Error DataLayout::setPrimitiveSpec(TypeSpecifier Specifier, uint32_t BitWidth, + Align ABIAlign, Align PrefAlign) { // AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as // uint16_t, it is unclear if there are requirements for alignment to be less // than 2^16 other than storage. In the meantime we leave the restriction as @@ -628,74 +603,72 @@ Error DataLayout::setAlignment(AlignTypeEnum AlignType, Align ABIAlign, return reportError( "Preferred alignment cannot be less than the ABI alignment"); - SmallVectorImpl *Alignments; - switch (AlignType) { - case AGGREGATE_ALIGN: + SmallVectorImpl *Specs; + switch (Specifier) { + case TypeSpecifier::Aggregate: StructABIAlignment = ABIAlign; StructPrefAlignment = PrefAlign; return Error::success(); - case INTEGER_ALIGN: - Alignments = &IntAlignments; + case TypeSpecifier::Integer: + Specs = &IntSpecs; break; - case FLOAT_ALIGN: - Alignments = &FloatAlignments; + case TypeSpecifier::Float: + Specs = &FloatSpecs; break; - case VECTOR_ALIGN: - Alignments = &VectorAlignments; + case TypeSpecifier::Vector: + Specs = &VectorSpecs; break; } - auto I = partition_point(*Alignments, [BitWidth](const LayoutAlignElem &E) { - return E.TypeBitWidth < BitWidth; + auto I = partition_point(*Specs, [BitWidth](const PrimitiveSpec &E) { + return E.BitWidth < BitWidth; }); - if (I != Alignments->end() && I->TypeBitWidth == BitWidth) { + if (I != Specs->end() && I->BitWidth == BitWidth) { // Update the abi, preferred alignments. I->ABIAlign = ABIAlign; I->PrefAlign = PrefAlign; } else { // Insert before I to keep the vector sorted. - Alignments->insert(I, LayoutAlignElem::get(ABIAlign, PrefAlign, BitWidth)); + Specs->insert(I, PrimitiveSpec{BitWidth, ABIAlign, PrefAlign}); } return Error::success(); } -const PointerAlignElem & -DataLayout::getPointerAlignElem(uint32_t AddressSpace) const { - if (AddressSpace != 0) { - auto I = lower_bound(Pointers, AddressSpace, - [](const PointerAlignElem &A, uint32_t AddressSpace) { - return A.AddressSpace < AddressSpace; - }); - if (I != Pointers.end() && I->AddressSpace == AddressSpace) +const DataLayout::PointerSpec & +DataLayout::getPointerSpec(uint32_t AddrSpace) const { + if (AddrSpace != 0) { + auto I = lower_bound(PointerSpecs, AddrSpace, + [](const PointerSpec &Spec, uint32_t AddrSpace) { + return Spec.AddrSpace < AddrSpace; + }); + if (I != PointerSpecs.end() && I->AddrSpace == AddrSpace) return *I; } - assert(Pointers[0].AddressSpace == 0); - return Pointers[0]; + assert(PointerSpecs[0].AddrSpace == 0); + return PointerSpecs[0]; } -Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign, - Align PrefAlign, - uint32_t TypeBitWidth, - uint32_t IndexBitWidth) { +Error DataLayout::setPointerSpec(uint32_t AddrSpace, uint32_t BitWidth, + Align ABIAlign, Align PrefAlign, + uint32_t IndexBitWidth) { if (PrefAlign < ABIAlign) return reportError( "Preferred alignment cannot be less than the ABI alignment"); - if (IndexBitWidth > TypeBitWidth) + if (IndexBitWidth > BitWidth) return reportError("Index width cannot be larger than pointer width"); - auto I = lower_bound(Pointers, AddrSpace, - [](const PointerAlignElem &A, uint32_t AddressSpace) { - return A.AddressSpace < AddressSpace; - }); - if (I == Pointers.end() || I->AddressSpace != AddrSpace) { - Pointers.insert(I, - PointerAlignElem::getInBits(AddrSpace, ABIAlign, PrefAlign, - TypeBitWidth, IndexBitWidth)); + auto I = lower_bound(PointerSpecs, AddrSpace, + [](const PointerSpec &A, uint32_t AddrSpace) { + return A.AddrSpace < AddrSpace; + }); + if (I == PointerSpecs.end() || I->AddrSpace != AddrSpace) { + PointerSpecs.insert(I, PointerSpec{AddrSpace, BitWidth, ABIAlign, PrefAlign, + IndexBitWidth}); } else { + I->BitWidth = BitWidth; I->ABIAlign = ABIAlign; I->PrefAlign = PrefAlign; - I->TypeBitWidth = TypeBitWidth; I->IndexBitWidth = IndexBitWidth; } return Error::success(); @@ -703,11 +676,11 @@ Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign, Align DataLayout::getIntegerAlignment(uint32_t BitWidth, bool abi_or_pref) const { - auto I = findAlignmentLowerBound(IntAlignments, BitWidth); + auto I = findPrimitiveSpecLowerBound(IntSpecs, BitWidth); // If we don't have an exact match, use alignment of next larger integer // type. If there is none, use alignment of largest integer type by going // back one element. - if (I == IntAlignments.end()) + if (I == IntSpecs.end()) --I; return abi_or_pref ? I->ABIAlign : I->PrefAlign; } @@ -737,22 +710,22 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const { } Align DataLayout::getPointerABIAlignment(unsigned AS) const { - return getPointerAlignElem(AS).ABIAlign; + return getPointerSpec(AS).ABIAlign; } Align DataLayout::getPointerPrefAlignment(unsigned AS) const { - return getPointerAlignElem(AS).PrefAlign; + return getPointerSpec(AS).PrefAlign; } unsigned DataLayout::getPointerSize(unsigned AS) const { - return divideCeil(getPointerAlignElem(AS).TypeBitWidth, 8); + return divideCeil(getPointerSpec(AS).BitWidth, 8); } unsigned DataLayout::getMaxIndexSize() const { unsigned MaxIndexSize = 0; - for (auto &P : Pointers) + for (const PointerSpec &Spec : PointerSpecs) MaxIndexSize = - std::max(MaxIndexSize, (unsigned)divideCeil(P.TypeBitWidth, 8)); + std::max(MaxIndexSize, (unsigned)divideCeil(Spec.BitWidth, 8)); return MaxIndexSize; } @@ -765,7 +738,7 @@ unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { } unsigned DataLayout::getIndexSize(unsigned AS) const { - return divideCeil(getPointerAlignElem(AS).IndexBitWidth, 8); + return divideCeil(getPointerSpec(AS).IndexBitWidth, 8); } unsigned DataLayout::getIndexTypeSizeInBits(Type *Ty) const { @@ -819,8 +792,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { case Type::FP128TyID: case Type::X86_FP80TyID: { unsigned BitWidth = getTypeSizeInBits(Ty).getFixedValue(); - auto I = findAlignmentLowerBound(FloatAlignments, BitWidth); - if (I != FloatAlignments.end() && I->TypeBitWidth == BitWidth) + auto I = findPrimitiveSpecLowerBound(FloatSpecs, BitWidth); + if (I != FloatSpecs.end() && I->BitWidth == BitWidth) return abi_or_pref ? I->ABIAlign : I->PrefAlign; // If we still couldn't find a reasonable default alignment, fall back @@ -834,8 +807,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const { case Type::FixedVectorTyID: case Type::ScalableVectorTyID: { unsigned BitWidth = getTypeSizeInBits(Ty).getKnownMinValue(); - auto I = findAlignmentLowerBound(VectorAlignments, BitWidth); - if (I != VectorAlignments.end() && I->TypeBitWidth == BitWidth) + auto I = findPrimitiveSpecLowerBound(VectorSpecs, BitWidth); + if (I != VectorSpecs.end() && I->BitWidth == BitWidth) return abi_or_pref ? I->ABIAlign : I->PrefAlign; // By default, use natural alignment for vector types. This is consistent From 3d06de544b8397a6b93a4fdb52650579237b77fa Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 15 Aug 2024 09:07:02 +0100 Subject: [PATCH 38/47] [lldb] Remove Phabricator usernames from Code Owners file (#102590) Removing them simplifies the content and means we don't confuse anyone who joined after the Phabricator shutdown. You could use them for review archaeology but this is only a subset of the names you'd encounter there anyway. So I don't think this is a good reason to keep them here. With a couple of exceptions the Phabricator/GitHub names are the same and/or related to their full name anyway. --- lldb/CodeOwners.rst | 96 ++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/lldb/CodeOwners.rst b/lldb/CodeOwners.rst index 52e3e550523e5b..3c10c2a28da9e7 100644 --- a/lldb/CodeOwners.rst +++ b/lldb/CodeOwners.rst @@ -17,7 +17,7 @@ assistance. All parts of LLDB not covered by someone else ---------------------------------------------- | Jonas Devlieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) Components ---------- @@ -27,100 +27,100 @@ LLDB. ABI ~~~ | Jason Molenda -| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) +| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) | David Spickett -| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) +| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) Breakpoint ~~~~~~~~~~ | Jim Ingham -| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse) +| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse) CMake & Build System ~~~~~~~~~~~~~~~~~~~~ | Jonas Devlieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) | Alex Langford -| alangford\@apple.com (email), bulbazord (Phabricator), bulbazord (GitHub), bulbazord (Discourse), bulba_zord (Discord) +| alangford\@apple.com (email), bulbazord (GitHub), bulbazord (Discourse), bulba_zord (Discord) Commands ~~~~~~~~ | Jim Ingham -| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse) +| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse) Expression Parser ~~~~~~~~~~~~~~~~~ | Michael Buch -| michaelbuch12\@gmail.com (email), Michael137 (Phabricator), Michael137 (GitHub), Michael137 (Discourse) +| michaelbuch12\@gmail.com (email), Michael137 (GitHub), Michael137 (Discourse) | Jim Ingham -| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse) +| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse) Interpreter ~~~~~~~~~~~ | Jim Ingham -| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse) +| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse) | Greg Clayton -| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse) +| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse) Lua ~~~ | Jonas Delvieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) Python ~~~~~~ | Med Ismail Bennani -| ismail\@bennani.ma (email), mib (Phabricator), medismailben (GitHub), mib (Discourse), mib#8727 (Discord) +| ismail\@bennani.ma (email), medismailben (GitHub), mib (Discourse), mib#8727 (Discord) Target/Process Control ~~~~~~~~~~~~~~~~~~~~~~ | Med Ismail Bennani -| ismail\@bennani.ma (email), mib (Phabricator), medismailben (GitHub), mib (Discourse), mib#8727 (Discord) +| ismail\@bennani.ma (email), medismailben (GitHub), mib (Discourse), mib#8727 (Discord) | Jim Ingham -| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse) +| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse) Test Suite ~~~~~~~~~~ | Jonas Devlieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) | Pavel Labath -| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse) +| pavel\@labath.sk (email), labath (GitHub), labath (Discourse) Trace ~~~~~ | Walter Erquinigo -| a20012251\@gmail.com (email), wallace (Phabricator), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord) +| a20012251\@gmail.com (email), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord) Unwinding ~~~~~~~~~ | Jason Molenda -| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) +| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) Utility ~~~~~~~ | Jonas Devlieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) | Pavel Labath -| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse) +| pavel\@labath.sk (email), labath (GitHub), labath (Discourse) ValueObject ~~~~~~~~~~~ | Jim Ingham -| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse) +| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse) Watchpoints ~~~~~~~~~~~ | Jason Molenda -| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) +| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) File Formats ------------ @@ -130,54 +130,54 @@ info formats. (PE)COFF ~~~~~~~~ | Saleem Abdulrasool -| compnerd\@compnerd.org (email), compnerd (Phabricator), compnerd (GitHub), compnerd (Discourse), compnerd (Discord) +| compnerd\@compnerd.org (email), compnerd (GitHub), compnerd (Discourse), compnerd (Discord) Breakpad ~~~~~~~~ | Zequan Wu -| zequanwu\@google.com (email), zequanwu (Phabricator), ZequanWu (GitHub), ZequanWu (Discourse) +| zequanwu\@google.com (email), ZequanWu (GitHub), ZequanWu (Discourse) | Pavel Labath -| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse) +| pavel\@labath.sk (email), labath (GitHub), labath (Discourse) CTF ~~~ | Jonas Devlieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) DWARF ~~~~~ | Adrian Prantl -| aprantl\@apple.com (email), aprantl (Phabricator), adrian-prantl (GitHub), adrian.prantl (Discourse), adrian.prantl (Discord), Adrian Prantl#4366 (Discourse) +| aprantl\@apple.com (email), adrian-prantl (GitHub), adrian.prantl (Discourse), adrian.prantl (Discord), Adrian Prantl#4366 (Discourse) | Greg Clayton -| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse) +| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse) ELF ~~~ | David Spickett -| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) +| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) | Pavel Labath -| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse) +| pavel\@labath.sk (email), labath (GitHub), labath (Discourse) JSON ~~~~ | Jonas Devlieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) MachO ~~~~~ | Greg Clayton -| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse) +| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse) | Jason Molenda -| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) +| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) PDB ~~~ | Zequan Wu -| zequanwu\@google.com (email), zequanwu (Phabricator), ZequanWu (GitHub), ZequanWu (Discourse) +| zequanwu\@google.com (email), ZequanWu (GitHub), ZequanWu (Discourse) Platforms --------- @@ -186,36 +186,36 @@ The following people are responsible for decisions involving platforms. Android ~~~~~~~ | Pavel Labath -| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse) +| pavel\@labath.sk (email), labath (GitHub), labath (Discourse) Darwin ~~~~~~ | Jim Ingham -| jingham\@apple.com (email), jingham (Phabricator), jimingham (GitHub), jingham (Discourse) +| jingham\@apple.com (email), jimingham (GitHub), jingham (Discourse) | Jason Molenda -| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) +| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) | Jonas Devlieghere -| jonas\@devlieghere.com (email), jdevlieghere (Phabricator), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) +| jonas\@devlieghere.com (email), jdevlieghere (GitHub), jdevlieghere (Discourse), jdevlieghere (Discord) FreeBSD ~~~~~~~ | Ed Maste -| emaste\@freebsd.org (email), emaste (Phabricator), emaste (GitHub), emaste (Discourse), emaste (Discord) +| emaste\@freebsd.org (email), emaste (GitHub), emaste (Discourse), emaste (Discord) Linux ~~~~~ | Pavel Labath -| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse) +| pavel\@labath.sk (email), labath (GitHub), labath (Discourse) | David Spickett -| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) +| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) Windows ~~~~~~~ | Omair Javaid -| omair.javaid\@linaro.org (email), omjavaid (Phabricator), omjavaid (GitHub), omjavaid (Discourse), omjavaid#9902 (Discord) +| omair.javaid\@linaro.org (email), omjavaid (GitHub), omjavaid (Discourse), omjavaid#9902 (Discord) Tools ----- @@ -224,23 +224,23 @@ The following people are responsible for decisions involving specific tools. debugserver ~~~~~~~~~~~ | Jason Molenda -| jmolenda\@apple.com (email), jasonmolenda (Phabricator), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) +| jmolenda\@apple.com (email), jasonmolenda (GitHub), jasonmolenda (Discourse), jasonmolenda (Discord) lldb-server ~~~~~~~~~~~ | David Spickett -| david.spickett\@linaro.org (email), DavidSpickett (Phabricator), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) +| david.spickett\@linaro.org (email), DavidSpickett (GitHub), DavidSpickett (Discourse), davidspickett (Discord) | Pavel Labath -| pavel\@labath.sk (email), labath (Phabricator), labath (GitHub), labath (Discourse) +| pavel\@labath.sk (email), labath (GitHub), labath (Discourse) lldb-dap ~~~~~~~~ | Greg Clayton -| gclayton\@fb.com (email), clayborg (Phabricator), clayborg (GitHub), clayborg (Discourse) +| gclayton\@fb.com (email), clayborg (GitHub), clayborg (Discourse) | Walter Erquinigo -| a20012251\@gmail.com (email), wallace (Phabricator), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord) +| a20012251\@gmail.com (email), walter-erquinigo (GitHub), wallace (Discourse), werquinigo (Discord) Former Code Owners ================== From 6d9cae12bdfcaeb2be10866e20e8883242f02c6c Mon Sep 17 00:00:00 2001 From: Rainer Orth Date: Thu, 15 Aug 2024 10:07:28 +0200 Subject: [PATCH 39/47] [flang][test] Run Driver/fveclib-codegen.f90 for aarch64 and x86_64 (#103730) `Flang :: Driver/fveclib-codegen.f90` currently `FAIL`s on SPARC, both Solaris/sparcv9 and Linux/sparc64: ``` bin/flang-new -S -Ofast -fveclib=LIBMVEC -o - /vol/llvm/src/llvm-project/local/flang/test/Driver/fveclib-codegen.f90 flang/test/Driver/fveclib-codegen.f90:11:10: error: CHECK: expected string not found in input ! CHECK: _ZGVbN4vv_powf ^ ``` The code in question only contains calls to `powf`. Given that `glibc` only supports `libmvec` on `aarch64` and `x86_64`, this test targets only those if possible. Tested on `sparcv9-sun-solaris2.11`, `sparc64-unknown-linux-gnu`, `amd64-pc-solaris2.11`, and `x86_64-pc-linux-gnu`. --- flang/test/Driver/fveclib-codegen.f90 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/test/Driver/fveclib-codegen.f90 b/flang/test/Driver/fveclib-codegen.f90 index 8d7d3af1e8f9ba..3a96c29ac70854 100644 --- a/flang/test/Driver/fveclib-codegen.f90 +++ b/flang/test/Driver/fveclib-codegen.f90 @@ -1,6 +1,7 @@ ! test that -fveclib= is passed to the backend ! -target aarch64 so that ArmPL is available -! RUN: %flang -S -Ofast -fveclib=LIBMVEC -o - %s | FileCheck %s +! RUN: %if aarch64-registered-target %{ %flang -S -Ofast -target aarch64-unknown-linux-gnu -fveclib=LIBMVEC -o - %s | FileCheck %s %} +! RUN: %if x86-registered-target %{ %flang -S -Ofast -target x86_64-unknown-linux-gnu -fveclib=LIBMVEC -o - %s | FileCheck %s %} ! RUN: %flang -S -Ofast -fveclib=NoLibrary -o - %s | FileCheck %s --check-prefix=NOLIB subroutine sb(a, b) From 6f6422f4a2b8647a59936c131e50a79906d89510 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 15 Aug 2024 08:14:13 +0000 Subject: [PATCH 40/47] [lldb][test] Mark sys_info zdump test unsupported on 32 bit Arm Linux Until https://github.com/llvm/llvm-project/pull/103056 lands or another more appropriate check can be found. This test fails on Ubuntu Focal where zdump is built with 32 bit time_t but passes on Ubuntu Jammy where zdump is built with 64 bit time_t. Marking it unsupported means Linaro can upgrade its bots to Ubuntu Jammy without getting an unexpected pass. --- .../time.zone.members/sys_info.zdump.pass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp index 207f8e4df45413..2b97d9a5bc745b 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp @@ -14,7 +14,7 @@ // XFAIL: availability-tzdb-missing // TODO TZDB Investigate -// XFAIL: target={{armv(7|8)l-linux-gnueabihf}} +// UNSUPPORTED: target={{armv(7|8)l-linux-gnueabihf}} #include #include From 141536544f4ec1d1bf24256157f4ff1a3bc07dae Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Thu, 15 Aug 2024 10:30:44 +0200 Subject: [PATCH 41/47] [MLIR][LLVM]: Add an IR utility to perform slice walking (#103053) This commit introduces a slicing utility that can be used to walk arbitrary IR slices. It additionally ships logic to determine control flow predecessors, which allows users to walk backward slices without dealing with both `RegionBranchOpInterface` and `BranchOpInterface`. This utility is used to improve the `noalias` propagation in the LLVM dialect's inliner interface. Before this change, it broke down as soon as pointer were passed through region control flow operations. --- mlir/include/mlir/Analysis/SliceWalk.h | 98 ++++++++++++ mlir/lib/Analysis/CMakeLists.txt | 1 + mlir/lib/Analysis/SliceWalk.cpp | 139 ++++++++++++++++++ .../Transforms/InlinerInterfaceImpl.cpp | 117 ++++++--------- .../Dialect/LLVMIR/inlining-alias-scopes.mlir | 54 +++++++ 5 files changed, 333 insertions(+), 76 deletions(-) create mode 100644 mlir/include/mlir/Analysis/SliceWalk.h create mode 100644 mlir/lib/Analysis/SliceWalk.cpp diff --git a/mlir/include/mlir/Analysis/SliceWalk.h b/mlir/include/mlir/Analysis/SliceWalk.h new file mode 100644 index 00000000000000..481c5690c533ba --- /dev/null +++ b/mlir/include/mlir/Analysis/SliceWalk.h @@ -0,0 +1,98 @@ +//===- SliceWalk.h - Helpers for performing IR slice walks ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_SLICEWALK_H +#define MLIR_ANALYSIS_SLICEWALK_H + +#include "mlir/IR/ValueRange.h" + +namespace mlir { + +/// A class to signal how to proceed with the walk of the backward slice: +/// - Interrupt: Stops the walk. +/// - AdvanceTo: Continues the walk to user-specified values. +/// - Skip: Continues the walk, but skips the predecessors of the current value. +class WalkContinuation { +public: + enum class WalkAction { + /// Stops the walk. + Interrupt, + /// Continues the walk to user-specified values. + AdvanceTo, + /// Continues the walk, but skips the predecessors of the current value. + Skip + }; + + WalkContinuation(WalkAction action, mlir::ValueRange nextValues) + : action(action), nextValues(nextValues) {} + + /// Allows diagnostics to interrupt the walk. + explicit WalkContinuation(mlir::Diagnostic &&) + : action(WalkAction::Interrupt) {} + + /// Allows diagnostics to interrupt the walk. + explicit WalkContinuation(mlir::InFlightDiagnostic &&) + : action(WalkAction::Interrupt) {} + + /// Creates a continuation that interrupts the walk. + static WalkContinuation interrupt() { + return WalkContinuation(WalkAction::Interrupt, {}); + } + + /// Creates a continuation that adds the user-specified `nextValues` to the + /// work list and advances the walk. + static WalkContinuation advanceTo(mlir::ValueRange nextValues) { + return WalkContinuation(WalkAction::AdvanceTo, nextValues); + } + + /// Creates a continuation that advances the walk without adding any + /// predecessor values to the work list. + static WalkContinuation skip() { + return WalkContinuation(WalkAction::Skip, {}); + } + + /// Returns true if the walk was interrupted. + bool wasInterrupted() const { return action == WalkAction::Interrupt; } + + /// Returns true if the walk was skipped. + bool wasSkipped() const { return action == WalkAction::Skip; } + + /// Returns true if the walk was advanced to user-specified values. + bool wasAdvancedTo() const { return action == WalkAction::AdvanceTo; } + + /// Returns the next values to continue the walk with. + mlir::ArrayRef getNextValues() const { return nextValues; } + +private: + WalkAction action; + /// The next values to continue the walk with. + mlir::SmallVector nextValues; +}; + +/// A callback that is invoked for each value encountered during the walk of the +/// slice. The callback takes the current value, and returns the walk +/// continuation, which determines if the walk should proceed and if yes, with +/// which values. +using WalkCallback = mlir::function_ref; + +/// Walks the slice starting from the `rootValues` using a depth-first +/// traversal. The walk calls the provided `walkCallback` for each value +/// encountered in the slice and uses the returned walk continuation to +/// determine how to proceed. +WalkContinuation walkSlice(mlir::ValueRange rootValues, + WalkCallback walkCallback); + +/// Computes a vector of all control predecessors of `value`. Relies on +/// RegionBranchOpInterface and BranchOpInterface to determine predecessors. +/// Returns nullopt if `value` has no predecessors or when the relevant +/// operations are missing the interface implementations. +std::optional> getControlFlowPredecessors(Value value); + +} // namespace mlir + +#endif // MLIR_ANALYSIS_SLICEWALK_H diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 38d8415d81c72d..609cb34309829e 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -29,6 +29,7 @@ add_mlir_library(MLIRAnalysis Liveness.cpp CFGLoopInfo.cpp SliceAnalysis.cpp + SliceWalk.cpp TopologicalSortUtils.cpp AliasAnalysis/LocalAliasAnalysis.cpp diff --git a/mlir/lib/Analysis/SliceWalk.cpp b/mlir/lib/Analysis/SliceWalk.cpp new file mode 100644 index 00000000000000..9d770639dc53ca --- /dev/null +++ b/mlir/lib/Analysis/SliceWalk.cpp @@ -0,0 +1,139 @@ +#include "mlir/Analysis/SliceWalk.h" +#include "mlir/Interfaces/ControlFlowInterfaces.h" + +using namespace mlir; + +WalkContinuation mlir::walkSlice(ValueRange rootValues, + WalkCallback walkCallback) { + // Search the backward slice starting from the root values. + SmallVector workList = rootValues; + llvm::SmallDenseSet seenValues; + while (!workList.empty()) { + // Search the backward slice of the current value. + Value current = workList.pop_back_val(); + + // Skip the current value if it has already been seen. + if (!seenValues.insert(current).second) + continue; + + // Call the walk callback with the current value. + WalkContinuation continuation = walkCallback(current); + if (continuation.wasInterrupted()) + return continuation; + if (continuation.wasSkipped()) + continue; + + assert(continuation.wasAdvancedTo()); + // Add the next values to the work list if the walk should continue. + workList.append(continuation.getNextValues().begin(), + continuation.getNextValues().end()); + } + + return WalkContinuation::skip(); +} + +/// Returns the operands from all predecessor regions that match `operandNumber` +/// for the `successor` region within `regionOp`. +static SmallVector +getRegionPredecessorOperands(RegionBranchOpInterface regionOp, + RegionSuccessor successor, + unsigned operandNumber) { + SmallVector predecessorOperands; + + // Returns true if `successors` contains `successor`. + auto isContained = [](ArrayRef successors, + RegionSuccessor successor) { + auto *it = llvm::find_if(successors, [&successor](RegionSuccessor curr) { + return curr.getSuccessor() == successor.getSuccessor(); + }); + return it != successors.end(); + }; + + // Search the operand ranges on the region operation itself. + SmallVector operandAttributes(regionOp->getNumOperands()); + SmallVector successors; + regionOp.getEntrySuccessorRegions(operandAttributes, successors); + if (isContained(successors, successor)) { + OperandRange operands = regionOp.getEntrySuccessorOperands(successor); + predecessorOperands.push_back(operands[operandNumber]); + } + + // Search the operand ranges on region terminators. + for (Region ®ion : regionOp->getRegions()) { + for (Block &block : region) { + auto terminatorOp = + dyn_cast(block.getTerminator()); + if (!terminatorOp) + continue; + SmallVector operandAttributes(terminatorOp->getNumOperands()); + SmallVector successors; + terminatorOp.getSuccessorRegions(operandAttributes, successors); + if (isContained(successors, successor)) { + OperandRange operands = terminatorOp.getSuccessorOperands(successor); + predecessorOperands.push_back(operands[operandNumber]); + } + } + } + + return predecessorOperands; +} + +/// Returns the predecessor branch operands that match `blockArg`, or nullopt if +/// some of the predecessor terminators do not implement the BranchOpInterface. +static std::optional> +getBlockPredecessorOperands(BlockArgument blockArg) { + Block *block = blockArg.getOwner(); + + // Search the predecessor operands for all predecessor terminators. + SmallVector predecessorOperands; + for (auto it = block->pred_begin(); it != block->pred_end(); ++it) { + Block *predecessor = *it; + auto branchOp = dyn_cast(predecessor->getTerminator()); + if (!branchOp) + return std::nullopt; + SuccessorOperands successorOperands = + branchOp.getSuccessorOperands(it.getSuccessorIndex()); + // Store the predecessor operand if the block argument matches an operand + // and is not produced by the terminator. + if (Value operand = successorOperands[blockArg.getArgNumber()]) + predecessorOperands.push_back(operand); + } + + return predecessorOperands; +} + +std::optional> +mlir::getControlFlowPredecessors(Value value) { + SmallVector result; + if (OpResult opResult = dyn_cast(value)) { + auto regionOp = dyn_cast(opResult.getOwner()); + // If the interface is not implemented, there are no control flow + // predecessors to work with. + if (!regionOp) + return std::nullopt; + // Add the control flow predecessor operands to the work list. + RegionSuccessor region(regionOp->getResults()); + SmallVector predecessorOperands = getRegionPredecessorOperands( + regionOp, region, opResult.getResultNumber()); + return predecessorOperands; + } + + auto blockArg = cast(value); + Block *block = blockArg.getOwner(); + // Search the region predecessor operands for structured control flow. + if (block->isEntryBlock()) { + if (auto regionBranchOp = + dyn_cast(block->getParentOp())) { + RegionSuccessor region(blockArg.getParentRegion()); + SmallVector predecessorOperands = getRegionPredecessorOperands( + regionBranchOp, region, blockArg.getArgNumber()); + return predecessorOperands; + } + // If the interface is not implemented, there are no control flow + // predecessors to work with. + return std::nullopt; + } + + // Search the block predecessor operands for unstructured control flow. + return getBlockPredecessorOperands(blockArg); +} diff --git a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp index 8eba76a9abee8d..504f63b48c9433 100644 --- a/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp +++ b/mlir/lib/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/LLVMIR/Transforms/InlinerInterfaceImpl.h" +#include "mlir/Analysis/SliceWalk.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Matchers.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" @@ -221,86 +222,45 @@ static ArrayAttr concatArrayAttr(ArrayAttr lhs, ArrayAttr rhs) { return ArrayAttr::get(lhs.getContext(), result); } -/// Attempts to return the underlying pointer value that `pointerValue` is based -/// on. This traverses down the chain of operations to the last operation -/// producing the base pointer and returns it. If it encounters an operation it -/// cannot further traverse through, returns the operation's result. -static Value getUnderlyingObject(Value pointerValue) { - while (true) { - if (auto gepOp = pointerValue.getDefiningOp()) { - pointerValue = gepOp.getBase(); - continue; - } - - if (auto addrCast = pointerValue.getDefiningOp()) { - pointerValue = addrCast.getOperand(); - continue; - } - - break; - } - - return pointerValue; -} - /// Attempts to return the set of all underlying pointer values that /// `pointerValue` is based on. This function traverses through select -/// operations and block arguments unlike getUnderlyingObject. -static SmallVector getUnderlyingObjectSet(Value pointerValue) { +/// operations and block arguments. +static FailureOr> +getUnderlyingObjectSet(Value pointerValue) { SmallVector result; - - SmallVector workList{pointerValue}; - // Avoid dataflow loops. - SmallPtrSet seen; - do { - Value current = workList.pop_back_val(); - current = getUnderlyingObject(current); - - if (!seen.insert(current).second) - continue; - - if (auto selectOp = current.getDefiningOp()) { - workList.push_back(selectOp.getTrueValue()); - workList.push_back(selectOp.getFalseValue()); - continue; + WalkContinuation walkResult = walkSlice(pointerValue, [&](Value val) { + if (auto gepOp = val.getDefiningOp()) + return WalkContinuation::advanceTo(gepOp.getBase()); + + if (auto addrCast = val.getDefiningOp()) + return WalkContinuation::advanceTo(addrCast.getOperand()); + + // TODO: Add a SelectLikeOpInterface and use it in the slicing utility. + if (auto selectOp = val.getDefiningOp()) + return WalkContinuation::advanceTo( + {selectOp.getTrueValue(), selectOp.getFalseValue()}); + + // Attempt to advance to control flow predecessors. + std::optional> controlFlowPredecessors = + getControlFlowPredecessors(val); + if (controlFlowPredecessors) + return WalkContinuation::advanceTo(*controlFlowPredecessors); + + // For all non-control flow results, consider `val` an underlying object. + if (isa(val)) { + result.push_back(val); + return WalkContinuation::skip(); } - if (auto blockArg = dyn_cast(current)) { - Block *parentBlock = blockArg.getParentBlock(); - - // Attempt to find all block argument operands for every predecessor. - // If any operand to the block argument wasn't found in a predecessor, - // conservatively add the block argument to the result set. - SmallVector operands; - bool anyUnknown = false; - for (auto iter = parentBlock->pred_begin(); - iter != parentBlock->pred_end(); iter++) { - auto branch = dyn_cast((*iter)->getTerminator()); - if (!branch) { - result.push_back(blockArg); - anyUnknown = true; - break; - } - - Value operand = branch.getSuccessorOperands( - iter.getSuccessorIndex())[blockArg.getArgNumber()]; - if (!operand) { - result.push_back(blockArg); - anyUnknown = true; - break; - } - - operands.push_back(operand); - } - - if (!anyUnknown) - llvm::append_range(workList, operands); - - continue; - } + // If this place is reached, `val` is a block argument that is not + // understood. Therefore, we conservatively interrupt. + // Note: Dealing with function arguments is not necessary, as the slice + // would have to go through an SSACopyOp first. + return WalkContinuation::interrupt(); + }); - result.push_back(current); - } while (!workList.empty()); + if (walkResult.wasInterrupted()) + return failure(); return result; } @@ -363,9 +323,14 @@ static void createNewAliasScopesFromNoAliasParameter( // Find the set of underlying pointers that this pointer is based on. SmallPtrSet basedOnPointers; - for (Value pointer : pointerArgs) - llvm::copy(getUnderlyingObjectSet(pointer), + for (Value pointer : pointerArgs) { + FailureOr> underlyingObjectSet = + getUnderlyingObjectSet(pointer); + if (failed(underlyingObjectSet)) + return; + llvm::copy(*underlyingObjectSet, std::inserter(basedOnPointers, basedOnPointers.begin())); + } bool aliasesOtherKnownObject = false; // Go through the based on pointers and check that they are either: diff --git a/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir b/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir index 0b8b60e963bb01..a91b991c5ed2b9 100644 --- a/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir +++ b/mlir/test/Dialect/LLVMIR/inlining-alias-scopes.mlir @@ -296,6 +296,60 @@ llvm.func @bar(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) { llvm.func @random() -> i1 +llvm.func @region_branch(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) { + %0 = llvm.mlir.constant(5 : i64) : i32 + test.region_if %arg0: !llvm.ptr -> !llvm.ptr then { + ^bb0(%arg2: !llvm.ptr): + test.region_if_yield %arg0 : !llvm.ptr + } else { + ^bb0(%arg2: !llvm.ptr): + test.region_if_yield %arg0 : !llvm.ptr + } join { + ^bb0(%arg2: !llvm.ptr): + llvm.store %0, %arg2 : i32, !llvm.ptr + test.region_if_yield %arg0 : !llvm.ptr + } + llvm.return +} + +// CHECK-LABEL: llvm.func @region_branch_inlining +// CHECK: llvm.store +// CHECK-SAME: alias_scopes = [#[[$ARG0_SCOPE]]] +// CHECK-SAME: noalias_scopes = [#[[$ARG1_SCOPE]]] +llvm.func @region_branch_inlining(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) { + llvm.call @region_branch(%arg0, %arg2) : (!llvm.ptr, !llvm.ptr) -> () + llvm.return +} + +// ----- + +llvm.func @missing_region_branch(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) { + %0 = llvm.mlir.constant(5 : i64) : i32 + "test.one_region_op"() ({ + ^bb0(%arg2: !llvm.ptr): + llvm.store %0, %arg2 : i32, !llvm.ptr + "test.terminator"() : () -> () + }) : () -> () + llvm.return +} + +// CHECK-LABEL: llvm.func @missing_region_branch_inlining +// CHECK: llvm.store +// CHECK-NOT: alias_scopes +// CHECK-NOT: noalias_scopes +llvm.func @missing_region_branch_inlining(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) { + llvm.call @missing_region_branch(%arg0, %arg2) : (!llvm.ptr, !llvm.ptr) -> () + llvm.return +} + +// ----- + +// CHECK-DAG: #[[DOMAIN:.*]] = #llvm.alias_scope_domain<{{.*}}> +// CHECK-DAG: #[[$ARG0_SCOPE:.*]] = #llvm.alias_scope +// CHECK-DAG: #[[$ARG1_SCOPE:.*]] = #llvm.alias_scope + +llvm.func @random() -> i1 + llvm.func @block_arg(%arg0: !llvm.ptr {llvm.noalias}, %arg1: !llvm.ptr {llvm.noalias}) { %0 = llvm.mlir.constant(5 : i64) : i32 %1 = llvm.mlir.constant(1 : i64) : i64 From 5f15c1776a462940464743dbc9e82c46fe7e14aa Mon Sep 17 00:00:00 2001 From: Yanzuo Liu Date: Thu, 15 Aug 2024 16:46:07 +0800 Subject: [PATCH 42/47] [llvm][Docs] `_or_null` -> `_if_present` in Programmer's Manual (#98586) `cast_or_null` is deprecated. https://github.com/llvm/llvm-project/blob/062844615db5e141da118c1ad780bf102537f40a/llvm/include/llvm/Support/Casting.h#L717-L722 --- llvm/docs/ProgrammersManual.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst index 231de56ef4cfee..41d1388e5bf7e9 100644 --- a/llvm/docs/ProgrammersManual.rst +++ b/llvm/docs/ProgrammersManual.rst @@ -164,20 +164,20 @@ rarely have to include this file directly). efficient to use the ``InstVisitor`` class to dispatch over the instruction type directly. -``isa_and_nonnull<>``: - The ``isa_and_nonnull<>`` operator works just like the ``isa<>`` operator, +``isa_and_present<>``: + The ``isa_and_present<>`` operator works just like the ``isa<>`` operator, except that it allows for a null pointer as an argument (which it then returns false). This can sometimes be useful, allowing you to combine several null checks into one. -``cast_or_null<>``: - The ``cast_or_null<>`` operator works just like the ``cast<>`` operator, +``cast_if_present<>``: + The ``cast_if_present<>`` operator works just like the ``cast<>`` operator, except that it allows for a null pointer as an argument (which it then propagates). This can sometimes be useful, allowing you to combine several null checks into one. -``dyn_cast_or_null<>``: - The ``dyn_cast_or_null<>`` operator works just like the ``dyn_cast<>`` +``dyn_cast_if_present<>``: + The ``dyn_cast_if_present<>`` operator works just like the ``dyn_cast<>`` operator, except that it allows for a null pointer as an argument (which it then propagates). This can sometimes be useful, allowing you to combine several null checks into one. From 05dfac23f1121aabb9675a38628e919689f993b0 Mon Sep 17 00:00:00 2001 From: Jorge Botto <23462171+jf-botto@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:49:40 +0100 Subject: [PATCH 43/47] [DAG] Adding m_FPToUI and m_FPToSI to SDPatternMatch.h (#104044) Adds m_FPToUI/m_FPToSI matchers for ISD::FP_TO_UINT/ISD::FP_TO_SINT in SDPatternMatch.h with suitable test coverage. Fixes https://github.com/llvm/llvm-project/issues/103872 --- llvm/include/llvm/CodeGen/SDPatternMatch.h | 8 ++++++++ .../unittests/CodeGen/SelectionDAGPatternMatchTest.cpp | 10 ++++++++++ 2 files changed, 18 insertions(+) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 96ece1559bc437..88ddd43a2a8913 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -737,6 +737,14 @@ template inline UnaryOpc_match m_VScale(const Opnd &Op) { return UnaryOpc_match(ISD::VSCALE, Op); } +template inline UnaryOpc_match m_FPToUI(const Opnd &Op) { + return UnaryOpc_match(ISD::FP_TO_UINT, Op); +} + +template inline UnaryOpc_match m_FPToSI(const Opnd &Op) { + return UnaryOpc_match(ISD::FP_TO_SINT, Op); +} + // === Constants === struct ConstantInt_match { APInt *BindVal; diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp index 074247e6e7d184..b9414be98623af 100644 --- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp @@ -238,9 +238,11 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) { SDLoc DL; auto Int32VT = EVT::getIntegerVT(Context, 32); auto Int64VT = EVT::getIntegerVT(Context, 64); + auto FloatVT = EVT::getFloatingPointVT(32); SDValue Op0 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int32VT); SDValue Op1 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int64VT); + SDValue Op2 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, FloatVT); SDValue ZExt = DAG->getNode(ISD::ZERO_EXTEND, DL, Int64VT, Op0); SDValue SExt = DAG->getNode(ISD::SIGN_EXTEND, DL, Int64VT, Op0); @@ -252,6 +254,9 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) { SDValue VScale = DAG->getVScale(DL, Int32VT, APInt::getMaxValue(32)); + SDValue FPToSI = DAG->getNode(ISD::FP_TO_SINT, DL, FloatVT, Op2); + SDValue FPToUI = DAG->getNode(ISD::FP_TO_UINT, DL, FloatVT, Op2); + using namespace SDPatternMatch; EXPECT_TRUE(sd_match(ZExt, m_UnaryOp(ISD::ZERO_EXTEND, m_Value()))); EXPECT_TRUE(sd_match(SExt, m_SExt(m_Value()))); @@ -263,6 +268,11 @@ TEST_F(SelectionDAGPatternMatchTest, matchUnaryOp) { EXPECT_FALSE(sd_match(Sub, m_Neg(m_Value()))); EXPECT_FALSE(sd_match(Neg, m_Not(m_Value()))); EXPECT_TRUE(sd_match(VScale, m_VScale(m_Value()))); + + EXPECT_TRUE(sd_match(FPToUI, m_FPToUI(m_Value()))); + EXPECT_TRUE(sd_match(FPToSI, m_FPToSI(m_Value()))); + EXPECT_FALSE(sd_match(FPToUI, m_FPToSI(m_Value()))); + EXPECT_FALSE(sd_match(FPToSI, m_FPToUI(m_Value()))); } TEST_F(SelectionDAGPatternMatchTest, matchConstants) { From 9a9ce9112fd7ed87d305d3e4f9b05c98a04f2382 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 15 Aug 2024 11:01:26 +0200 Subject: [PATCH 44/47] [bazel] Port for 141536544f4ec1d1bf24256157f4ff1a3bc07dae --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ac9e311ad5110e..7273429d4f344f 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5425,8 +5425,9 @@ cc_library( includes = ["include"], deps = [ ":DataLayoutInterfaces", + ":Analysis", ":FuncDialect", - ":InliningUtils", + ":InliningUtils", ":IR", ":LLVMDialect", ":LLVMPassIncGen", From 7227b44f928a87b5d7fb05bd1539fdfb6d4958dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Horv=C3=A1th?= Date: Thu, 15 Aug 2024 10:04:59 +0100 Subject: [PATCH 45/47] [clang][driver] Fix -print-target-triple OS version for apple targets (#104037) The target needs to be initialized in order to compute the correct target triple from the command line. Without initialized targets the OS component of the triple might not reflect what would be computed by the driver for an actual compiler invocation. Fixes https://github.com/llvm/llvm-project/issues/61762 --- clang/lib/Driver/Driver.cpp | 13 ++++-- .../test/Driver/darwin-print-target-triple.c | 42 +++++++++++++++++++ 2 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 clang/test/Driver/darwin-print-target-triple.c diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e12416e51f8d24..5b95019c25cab6 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2271,8 +2271,7 @@ bool Driver::HandleImmediateArgs(Compilation &C) { return false; } - if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) { - ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs()); + auto initializeTargets = [&]() { const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs())); // The 'Darwin' toolchain is initialized only when its arguments are // computed. Get the default arguments for OFK_None to ensure that @@ -2282,6 +2281,12 @@ bool Driver::HandleImmediateArgs(Compilation &C) { // FIXME: For some more esoteric targets the default toolchain is not the // correct one. C.getArgsForToolChain(&TC, Triple.getArchName(), Action::OFK_None); + return Triple; + }; + + if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) { + ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs()); + const llvm::Triple Triple = initializeTargets(); RegisterEffectiveTriple TripleRAII(TC, Triple); switch (RLT) { case ToolChain::RLT_CompilerRT: @@ -2325,7 +2330,9 @@ bool Driver::HandleImmediateArgs(Compilation &C) { } if (C.getArgs().hasArg(options::OPT_print_target_triple)) { - llvm::outs() << TC.getTripleString() << "\n"; + initializeTargets(); + llvm::Triple Triple(TC.ComputeEffectiveClangTriple(C.getArgs())); + llvm::outs() << Triple.getTriple() << "\n"; return false; } diff --git a/clang/test/Driver/darwin-print-target-triple.c b/clang/test/Driver/darwin-print-target-triple.c new file mode 100644 index 00000000000000..4f5fdfe9d0db34 --- /dev/null +++ b/clang/test/Driver/darwin-print-target-triple.c @@ -0,0 +1,42 @@ +// Test the output of -print-target-triple on Darwin. +// See https://github.com/llvm/llvm-project/issues/61762 + +// +// All platforms +// + +// RUN: %clang -print-target-triple \ +// RUN: --target=x86_64-apple-macos -mmacos-version-min=15 \ +// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CLANGRT-MACOS %s +// CHECK-CLANGRT-MACOS: x86_64-apple-macosx15.0.0 + +// RUN: %clang -print-target-triple \ +// RUN: --target=arm64-apple-ios -mios-version-min=9 \ +// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CLANGRT-IOS %s +// CHECK-CLANGRT-IOS: arm64-apple-ios9.0.0 + +// RUN: %clang -print-target-triple \ +// RUN: --target=arm64-apple-watchos -mwatchos-version-min=3 \ +// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS %s +// CHECK-CLANGRT-WATCHOS: arm64-apple-watchos3.0.0 + +// RUN: %clang -print-target-triple \ +// RUN: --target=armv7k-apple-watchos -mwatchos-version-min=3 \ +// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CLANGRT-WATCHOS-ARMV7K %s +// CHECK-CLANGRT-WATCHOS-ARMV7K: thumbv7-apple-watchos3.0.0 + +// RUN: %clang -print-target-triple \ +// RUN: --target=arm64-apple-tvos -mtvos-version-min=1\ +// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CLANGRT-TVOS %s +// CHECK-CLANGRT-TVOS: arm64-apple-tvos1.0.0 + +// RUN: %clang -print-target-triple \ +// RUN: --target=arm64-apple-driverkit \ +// RUN: -resource-dir=%S/Inputs/resource_dir 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CLANGRT-DRIVERKIT %s +// CHECK-CLANGRT-DRIVERKIT: arm64-apple-driverkit19.0.0 From 3a3990cb05858e7892a4825c677891a980f1cea8 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Thu, 15 Aug 2024 10:15:26 +0100 Subject: [PATCH 46/47] [Flang][OpenMP] Move assert for wrapper syms and block args to genLoopNestOp (#103731) This patch adds an assert to `genLoopNestClauses` to ensure the number of symbols and corresponding loop wrapper entry block arguments have the same size. This is checked by some of the callers, but it makes more sense moving it into the function itself and avoid having to replicate it. --- flang/lib/Lower/OpenMP/OpenMP.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 95c55805dcabb3..64b581e8910d07 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1371,6 +1371,9 @@ genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable, llvm::ArrayRef wrapperSyms, llvm::ArrayRef wrapperArgs, llvm::omp::Directive directive, DataSharingProcessor &dsp) { + assert(wrapperSyms.size() == wrapperArgs.size() && + "Number of symbols and wrapper block arguments must match"); + auto ivCallback = [&](mlir::Operation *op) { genLoopVars(op, converter, loc, iv, wrapperSyms, wrapperArgs); return llvm::SmallVector(iv); @@ -2083,8 +2086,6 @@ static void genCompositeDistributeSimd( llvm::concat(distributeOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); - assert(wrapperArgs.empty() && - "Block args for omp.simd and omp.distribute currently not expected"); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, /*wrapperSyms=*/{}, wrapperArgs, llvm::omp::Directive::OMPD_distribute_simd, dsp); @@ -2132,8 +2133,6 @@ static void genCompositeDoSimd(lower::AbstractConverter &converter, auto wrapperArgs = llvm::to_vector(llvm::concat( wsloopOp.getRegion().getArguments(), simdOp.getRegion().getArguments())); - assert(wsloopReductionSyms.size() == wrapperArgs.size() && - "Number of symbols and wrapper block arguments must match"); genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, item, loopNestClauseOps, iv, wsloopReductionSyms, wrapperArgs, llvm::omp::Directive::OMPD_do_simd, dsp); From 8107810cad24d41fe43c6777370c7b81ca83ad84 Mon Sep 17 00:00:00 2001 From: Justin Bogner Date: Thu, 15 Aug 2024 12:16:52 +0300 Subject: [PATCH 47/47] [DirectX] Use a more consistent pass name for DXILTranslateMetadata This updates the "dxil-metadata-emit" pass flag to be spelled "dxil-translate-metadata" to better match the pass name. Pull Request: https://github.com/llvm/llvm-project/pull/104249 --- llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp | 10 +++++----- llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll | 2 +- llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll | 3 +-- llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll | 2 +- llvm/test/CodeGen/DirectX/UAVMetadata.ll | 2 +- llvm/test/CodeGen/DirectX/cbuf.ll | 2 +- llvm/test/CodeGen/DirectX/dxil_ver.ll | 2 +- llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll | 2 +- llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll | 2 +- llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll | 2 +- llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll | 2 +- llvm/test/CodeGen/DirectX/lib_entry.ll | 2 +- llvm/test/Frontend/HLSL/empty_cs_entry.ll | 2 +- 20 files changed, 24 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index 583bce0f50e700..21089a232783a5 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -29,7 +29,7 @@ class DXILTranslateMetadata : public ModulePass { static char ID; // Pass identification, replacement for typeid explicit DXILTranslateMetadata() : ModulePass(ID) {} - StringRef getPassName() const override { return "DXIL Metadata Emit"; } + StringRef getPassName() const override { return "DXIL Translate Metadata"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -67,9 +67,9 @@ ModulePass *llvm::createDXILTranslateMetadataPass() { return new DXILTranslateMetadata(); } -INITIALIZE_PASS_BEGIN(DXILTranslateMetadata, "dxil-metadata-emit", - "DXIL Metadata Emit", false, false) +INITIALIZE_PASS_BEGIN(DXILTranslateMetadata, "dxil-translate-metadata", + "DXIL Translate Metadata", false, false) INITIALIZE_PASS_DEPENDENCY(DXILResourceMDWrapper) INITIALIZE_PASS_DEPENDENCY(ShaderFlagsAnalysisWrapper) -INITIALIZE_PASS_END(DXILTranslateMetadata, "dxil-metadata-emit", - "DXIL Metadata Emit", false, false) +INITIALIZE_PASS_END(DXILTranslateMetadata, "dxil-translate-metadata", + "DXIL Translate Metadata", false, false) diff --git a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll index b9a8e3e80567e0..318d5a6210eeea 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.0.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel6.0-vertex" diff --git a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll index fdd21d627829b9..fb54fa916f33f9 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/dxilVer-1.8.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel6.8-compute" diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll index d2625fc8b96a9d..96d04f948c9b83 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-as.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel6-amplification" diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll index 24eb0d608d8bb6..8cba445bcb01e8 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-cs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -dxil-prepare %s | FileCheck %s --check-prefix=REMOVE_EXTRA_ATTRIBUTE ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll index 5c28c9305f01bf..662620cf9f95cb 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-gs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel6.6-geometry" diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll index e60023d1b3a5fb..b405f8e915a329 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-hs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel6.6-hull" diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll index 7f0bea95c04822..26f3d287242edd 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-lib.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel6.3-library" diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll index dd033b9a9722b9..422d4add912f3f 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ms.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel6.6-mesh" diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll index 47da321df3e42e..cdb9a6f0f6a4f4 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-ps.ll @@ -1,5 +1,4 @@ - -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel5.0-pixel" diff --git a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll index dcc68586dd4b9f..6b3501cc1dbaf7 100644 --- a/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll +++ b/llvm/test/CodeGen/DirectX/Metadata/shaderModel-vs.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata %s | FileCheck %s ; RUN: opt -S -passes="print" -disable-output %s 2>&1 | FileCheck %s --check-prefix=ANALYSIS target triple = "dxil-pc-shadermodel-vertex" diff --git a/llvm/test/CodeGen/DirectX/UAVMetadata.ll b/llvm/test/CodeGen/DirectX/UAVMetadata.ll index bdad9fd40c9bd3..b10112a044df58 100644 --- a/llvm/test/CodeGen/DirectX/UAVMetadata.ll +++ b/llvm/test/CodeGen/DirectX/UAVMetadata.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s ; RUN: opt -S --passes="print-dxil-resource-md" < %s 2>&1 | FileCheck %s --check-prefix=PRINT ; RUN: llc %s --filetype=asm -o - < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,PRINT diff --git a/llvm/test/CodeGen/DirectX/cbuf.ll b/llvm/test/CodeGen/DirectX/cbuf.ll index 38f08fad995d1f..e31a659728fcf2 100644 --- a/llvm/test/CodeGen/DirectX/cbuf.ll +++ b/llvm/test/CodeGen/DirectX/cbuf.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD ; RUN: opt -S --passes="print-dxil-resource-md" < %s 2>&1 | FileCheck %s --check-prefix=PRINT target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" diff --git a/llvm/test/CodeGen/DirectX/dxil_ver.ll b/llvm/test/CodeGen/DirectX/dxil_ver.ll index e9923a3abce02d..3c1d2e81020098 100644 --- a/llvm/test/CodeGen/DirectX/dxil_ver.ll +++ b/llvm/test/CodeGen/DirectX/dxil_ver.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-pc-shadermodel6.3-library" diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll index 0cfb839746b93e..1a618092c5fed3 100644 --- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_0.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-unknown-shadermodel6.7-library" diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll index b6d29f8d18d79f..6886f2690209dc 100644 --- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_1.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-unknown-shadermodel6.7-library" diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll index d023d7906fdc52..3b08b25542201c 100644 --- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_2.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-unknown-shadermodel6.7-library" diff --git a/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll index 38c2cd18b5ca1d..f01afbdab96733 100644 --- a/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll +++ b/llvm/test/CodeGen/DirectX/legacy_cb_layout_3.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXILMD +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXILMD target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-unknown-shadermodel6.7-library" diff --git a/llvm/test/CodeGen/DirectX/lib_entry.ll b/llvm/test/CodeGen/DirectX/lib_entry.ll index 5254a088055888..9aa63c26ce845a 100644 --- a/llvm/test/CodeGen/DirectX/lib_entry.ll +++ b/llvm/test/CodeGen/DirectX/lib_entry.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -dxil-metadata-emit < %s | FileCheck %s +; RUN: opt -S -dxil-translate-metadata < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" target triple = "dxil-unknown-shadermodel6.7-library" diff --git a/llvm/test/Frontend/HLSL/empty_cs_entry.ll b/llvm/test/Frontend/HLSL/empty_cs_entry.ll index 45b0faeaa44d44..32736aeeb542ce 100644 --- a/llvm/test/Frontend/HLSL/empty_cs_entry.ll +++ b/llvm/test/Frontend/HLSL/empty_cs_entry.ll @@ -1,4 +1,4 @@ -; RUN: %if directx-registered-target %{ opt -S -dxil-metadata-emit < %s | FileCheck %s --check-prefix=DXIL-CHECK %} +; RUN: %if directx-registered-target %{ opt -S -dxil-translate-metadata < %s | FileCheck %s --check-prefix=DXIL-CHECK %} ; RUN: %if spirv-registered-target %{ llc %s -mtriple=spirv-unknown-unknown -o - | FileCheck %s --check-prefix=SPIRV-CHECK %} target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"