diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b93220e873f61c..b532c9f451f1a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -29,6 +29,9 @@ void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); void initializeAMDGPURegBankCombinerPass(PassRegistry &); +FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); +FunctionPass *createAMDGPURegBankSelectPass(); +FunctionPass *createAMDGPURegBankLegalizePass(); // SI Passes FunctionPass *createGCNDPPCombinePass(); @@ -36,7 +39,6 @@ FunctionPass *createSIAnnotateControlFlowLegacyPass(); FunctionPass *createSIFoldOperandsLegacyPass(); FunctionPass *createSIPeepholeSDWALegacyPass(); FunctionPass *createSILowerI1CopiesLegacyPass(); -FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass(); FunctionPass *createSIShrinkInstructionsLegacyPass(); FunctionPass *createSILoadStoreOptimizerLegacyPass(); FunctionPass *createSIWholeQuadModePass(); @@ -186,6 +188,12 @@ extern char &SILowerI1CopiesLegacyID; void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &); extern char &AMDGPUGlobalISelDivergenceLoweringID; +void initializeAMDGPURegBankSelectPass(PassRegistry &); +extern char &AMDGPURegBankSelectID; + +void initializeAMDGPURegBankLegalizePass(PassRegistry &); +extern char &AMDGPURegBankLegalizeID; + void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &); extern char &AMDGPUMarkLastScratchLoadID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp new file mode 100644 index 00000000000000..283173deaeedcb --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -0,0 +1,79 @@ +//===-- AMDGPURegBankLegalize.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Lower G_ instructions that can't be inst-selected with register bank +/// assignment from AMDGPURegBankSelect based on machine uniformity info. +/// Given types on all operands, some register bank assignments require lowering +/// while others do not. +/// Note: cases where all register bank assignments would require lowering are +/// lowered in legalizer. +/// For example vgpr S64 G_AND requires lowering to S32 while sgpr S64 does not. +/// Eliminate sgpr S1 by lowering to sgpr S32. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +#define DEBUG_TYPE "amdgpu-regbanklegalize" + +using namespace llvm; + +namespace { + +class AMDGPURegBankLegalize : public MachineFunctionPass { +public: + static char ID; + +public: + AMDGPURegBankLegalize() : MachineFunctionPass(ID) { + initializeAMDGPURegBankLegalizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "AMDGPU Register Bank Legalize"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + // If there were no phis and we do waterfall expansion machine verifier would + // fail. + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoPHIs); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(AMDGPURegBankLegalize, DEBUG_TYPE, + "AMDGPU Register Bank Legalize", false, false) +INITIALIZE_PASS_END(AMDGPURegBankLegalize, DEBUG_TYPE, + "AMDGPU Register Bank Legalize", false, false) + +char AMDGPURegBankLegalize::ID = 0; + +char &llvm::AMDGPURegBankLegalizeID = AMDGPURegBankLegalize::ID; + +FunctionPass *llvm::createAMDGPURegBankLegalizePass() { + return new AMDGPURegBankLegalize(); +} + +using namespace AMDGPU; + +bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp new file mode 100644 index 00000000000000..4d64caa354b104 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp @@ -0,0 +1,74 @@ +//===-- AMDGPURegBankSelect.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Assign register banks to all register operands of G_ instructions using +/// machine uniformity analysis. +/// SGPR - uniform values and some lane masks +/// VGPR - divergent, non S1, values +/// VCC - divergent S1 values(lane masks) +/// However in some cases G_ instructions with this register bank assignment +/// can't be inst-selected. This is solved in AMDGPURegBankLegalize. +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +#define DEBUG_TYPE "amdgpu-regbankselect" + +using namespace llvm; + +namespace { + +class AMDGPURegBankSelect : public MachineFunctionPass { +public: + static char ID; + + AMDGPURegBankSelect() : MachineFunctionPass(ID) { + initializeAMDGPURegBankSelectPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "AMDGPU Register Bank Select"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + // This pass assigns register banks to all virtual registers, and we maintain + // this property in subsequent passes + MachineFunctionProperties getSetProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::RegBankSelected); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE, + "AMDGPU Register Bank Select", false, false) +INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE, + "AMDGPU Register Bank Select", false, false) + +char AMDGPURegBankSelect::ID = 0; + +char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID; + +FunctionPass *llvm::createAMDGPURegBankSelectPass() { + return new AMDGPURegBankSelect(); +} + +bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) { + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + return true; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 51688fb9807adf..2e5fca22e1cfb4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -443,6 +443,12 @@ static cl::opt cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden); +static cl::opt NewRegBankSelect( + "new-reg-bank-select", + cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " + "regbankselect"), + cl::init(false), cl::Hidden); + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheR600Target()); @@ -459,6 +465,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGCNDPPCombineLegacyPass(*PR); initializeSILowerI1CopiesLegacyPass(*PR); initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR); + initializeAMDGPURegBankSelectPass(*PR); + initializeAMDGPURegBankLegalizePass(*PR); initializeSILowerWWMCopiesPass(*PR); initializeAMDGPUMarkLastScratchLoadPass(*PR); initializeSILowerSGPRSpillsLegacyPass(*PR); @@ -1370,7 +1378,12 @@ void GCNPassConfig::addPreRegBankSelect() { } bool GCNPassConfig::addRegBankSelect() { - addPass(new RegBankSelect()); + if (NewRegBankSelect) { + addPass(createAMDGPURegBankSelectPass()); + addPass(createAMDGPURegBankLegalizePass()); + } else { + addPass(new RegBankSelect()); + } return false; } diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index b0197c3c6c280a..68d141e338a882 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -92,6 +92,8 @@ add_llvm_target(AMDGPUCodeGen AMDGPUPromoteAlloca.cpp AMDGPUPromoteKernelArguments.cpp AMDGPURegBankCombiner.cpp + AMDGPURegBankLegalize.cpp + AMDGPURegBankSelect.cpp AMDGPURegisterBankInfo.cpp AMDGPURemoveIncompatibleFunctions.cpp AMDGPUReserveWWMRegs.cpp diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir new file mode 100644 index 00000000000000..880057813adf54 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbanklegalize.mir @@ -0,0 +1,858 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=none %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: uniform_in_vgpr +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FPTOUI %0(s32) + %7:_(s32) = G_ADD %6, %1 + G_STORE %7(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: back_to_back_uniform_in_vgpr +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: back_to_back_uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_FADD %0, %1 + %8:_(s32) = G_FPTOUI %7(s32) + %9:_(s32) = G_ADD %8, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_uniform +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: buffer_load_uniform + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = COPY $sgpr2 + %6:_(s32) = COPY $sgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + %1:_(s32) = COPY $sgpr4 + %7:_(s32) = COPY $vgpr0 + %8:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %11:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %0(<4 x s32>), %11(s32), %1, %11, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %13:_(s32) = G_CONSTANT i32 1 + %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %14:_(s32) = G_ADD %16, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_divergent +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: buffer_load_divergent + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = COPY $sgpr2 + %6:_(s32) = COPY $sgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + %1:_(s32) = COPY $vgpr0 + %7:_(s32) = COPY $vgpr1 + %8:_(s32) = COPY $vgpr2 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %11:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %0(<4 x s32>), %11(s32), %1, %11, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %13:_(s32) = G_CONSTANT i32 1 + %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %14:_(s32) = G_ADD %16, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_and_i64 +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; CHECK-LABEL: name: vgpr_and_i64 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[MV1]] + ; CHECK-NEXT: G_STORE [[AND]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) + %5:_(s32) = COPY $vgpr2 + %6:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr4 + %8:_(s32) = COPY $vgpr5 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %10:_(s64) = G_AND %0, %1 + G_STORE %10(s64), %2(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: abs_sgpr_i16 +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: abs_sgpr_i16 + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) + ; CHECK-NEXT: G_STORE [[ANYEXT]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s16) = G_ABS %0 + %7:_(s32) = G_ANYEXT %6(s16) + G_STORE %7(s32), %1(p1) :: (store (s16), addrspace 1) + S_ENDPGM 0 +... + +--- +name: uniform_i1_phi +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: uniform_i1_phi + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[PHI]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x30000000), %bb.3(0x50000000) + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %6:_(s32) = G_CONSTANT i32 6 + %7:_(s1) = G_ICMP intpred(uge), %1(s32), %6 + %8:_(s32) = G_CONSTANT i32 0 + %9:_(s1) = G_ICMP intpred(ne), %2(s32), %8 + G_BRCOND %9(s1), %bb.3 + G_BR %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %19:_(s32) = G_CONSTANT i32 1 + %11:_(s1) = G_ICMP intpred(ult), %1(s32), %19 + + bb.3: + %12:_(s1) = G_PHI %7(s1), %bb.1, %11(s1), %bb.2 + %17:_(s32) = G_SEXT %12(s1) + %18:_(s32) = G_CONSTANT i32 2 + %13:_(s32) = G_ADD %17, %18 + G_STORE %13(s32), %0(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vcc_to_scc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: vcc_to_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_FCONSTANT float 0.000000e+00 + %8:_(s1) = G_FCMP floatpred(oeq), %0(s32), %7 + %9:_(s32) = G_SELECT %8(s1), %1, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: scc_to_vcc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: scc_to_vcc + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_CONSTANT i32 0 + %8:_(s1) = G_ICMP intpred(eq), %0(s32), %7 + %9:_(s32) = G_SELECT %8(s1), %1, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_to_vcc_trunc +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: vgpr_to_vcc_trunc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(s32) = COPY $vgpr4 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s1) = G_TRUNC %0(s32) + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: zext +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: zext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[ZEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(eq), %0(s32), %5 + %7:_(s32) = G_ZEXT %6(s1) + G_STORE %7(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: sext +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: sext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(eq), %0(s32), %5 + %7:_(s32) = G_SEXT %6(s1) + G_STORE %7(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_vcc +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: and_i1_vcc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 10 + %7:_(s1) = G_ICMP intpred(uge), %0(s32), %6 + %8:_(s32) = G_CONSTANT i32 20 + %9:_(s1) = G_ICMP intpred(uge), %1(s32), %8 + %10:_(s1) = G_AND %7, %9 + %11:_(s32) = G_SELECT %10(s1), %0, %1 + G_STORE %11(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_scc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: and_i1_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 10 + %7:_(s1) = G_ICMP intpred(uge), %0(s32), %6 + %8:_(s32) = G_CONSTANT i32 20 + %9:_(s1) = G_ICMP intpred(uge), %1(s32), %8 + %10:_(s1) = G_AND %7, %9 + %11:_(s32) = G_SELECT %10(s1), %0, %1 + G_STORE %11(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_phi_with_uniform_inputs +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_phi_with_uniform_inputs + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 0 + %6:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %5 + %8:sreg_32_xm0_xexec(s32) = SI_IF %6(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %13:_(s32) = G_CONSTANT i32 1 + + bb.3: + %9:_(s32) = G_PHI %5(s32), %bb.1, %13(s32), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %8(s32) + G_STORE %9(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_because_of_temporal_divergent_use +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_because_of_temporal_divergent_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[PHI2]], [[C3]] + ; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %18:_(s32) = G_CONSTANT i32 -1 + %17:_(s32) = G_CONSTANT i32 0 + + bb.2: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %5:_(s32) = G_PHI %11(s32), %bb.2, %17(s32), %bb.1 + %6:_(s32) = G_PHI %18(s32), %bb.1, %8(s32), %bb.2 + %22:_(s32) = G_CONSTANT i32 1 + %8:_(s32) = G_ADD %6, %22 + %9:_(s32) = G_UITOFP %8(s32) + %10:_(s1) = G_FCMP floatpred(ogt), %9(s32), %0 + %11:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(s1), %5(s32) + SI_LOOP %11(s32), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.3 + + bb.3: + %13:_(s32) = G_PHI %8(s32), %bb.2 + %14:_(s32) = G_PHI %11(s32), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) + %21:_(s32) = G_CONSTANT i32 10 + %16:_(s32) = G_MUL %13, %21 + G_STORE %16(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: loop_with_2breaks +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: loop_with_2breaks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 + %11:sreg_32(s1) = IMPLICIT_DEF + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 + %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 + %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 + %18:sreg_32(s1) = COPY %12(s1) + %19:_(s64) = G_SEXT %16(s32) + %20:_(s32) = G_CONSTANT i32 2 + %21:_(s64) = G_SHL %19, %20(s32) + %22:_(p1) = G_PTR_ADD %5, %21(s64) + %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) + %24:_(s32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 + %26:_(s1) = G_CONSTANT i1 true + %27:sreg_32(s1) = COPY %26(s1) + %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc + %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc + %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc + %31:sreg_32(s1) = COPY %30(s1) + %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %33:_(s32) = G_CONSTANT i32 2 + %34:_(s64) = G_SHL %19, %33(s32) + %35:_(p1) = G_PTR_ADD %8, %34(s64) + %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) + %37:_(s32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 + %39:_(s1) = G_CONSTANT i1 true + %40:sreg_32(s1) = COPY %39(s1) + %41:sreg_32(s1) = COPY %40(s1) + %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.4 + + bb.3: + successors: %bb.6(0x04000000), %bb.1(0x7c000000) + + %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 + %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 + %45:sreg_32(s1) = COPY %13(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) + %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) + SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.6 + + bb.4: + successors: %bb.5(0x80000000) + + %46:_(s32) = G_CONSTANT i32 2 + %47:_(s64) = G_SHL %19, %46(s32) + %48:_(p1) = G_PTR_ADD %2, %47(s64) + %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) + %50:_(s32) = G_CONSTANT i32 1 + %51:_(s32) = G_ADD %49, %50 + G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) + %52:_(s32) = G_ADD %16, %50 + %53:_(s32) = G_CONSTANT i32 100 + %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 + %55:sreg_32(s1) = COPY %54(s1) + %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc + %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc + %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + + bb.5: + successors: %bb.3(0x80000000) + + %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 + %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 + %60:sreg_32(s1) = COPY %59(s1) + %61:sreg_32(s1) = COPY %60(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) + %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc + %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc + %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + G_BR %bb.3 + + bb.6: + %64:_(s32) = G_PHI %15(s32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir new file mode 100644 index 00000000000000..880057813adf54 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-regbankselect.mir @@ -0,0 +1,858 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=none %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: uniform_in_vgpr +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FPTOUI %0(s32) + %7:_(s32) = G_ADD %6, %1 + G_STORE %7(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: back_to_back_uniform_in_vgpr +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: back_to_back_uniform_in_vgpr + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_FADD %0, %1 + %8:_(s32) = G_FPTOUI %7(s32) + %9:_(s32) = G_ADD %8, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_uniform +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: buffer_load_uniform + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = COPY $sgpr2 + %6:_(s32) = COPY $sgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + %1:_(s32) = COPY $sgpr4 + %7:_(s32) = COPY $vgpr0 + %8:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %11:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %0(<4 x s32>), %11(s32), %1, %11, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %13:_(s32) = G_CONSTANT i32 1 + %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %14:_(s32) = G_ADD %16, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_divergent +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: buffer_load_divergent + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = COPY $sgpr2 + %6:_(s32) = COPY $sgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + %1:_(s32) = COPY $vgpr0 + %7:_(s32) = COPY $vgpr1 + %8:_(s32) = COPY $vgpr2 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %11:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %0(<4 x s32>), %11(s32), %1, %11, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %13:_(s32) = G_CONSTANT i32 1 + %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %14:_(s32) = G_ADD %16, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_and_i64 +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; CHECK-LABEL: name: vgpr_and_i64 + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[MV1]] + ; CHECK-NEXT: G_STORE [[AND]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) + %5:_(s32) = COPY $vgpr2 + %6:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr4 + %8:_(s32) = COPY $vgpr5 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %10:_(s64) = G_AND %0, %1 + G_STORE %10(s64), %2(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: abs_sgpr_i16 +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: abs_sgpr_i16 + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[ABS:%[0-9]+]]:_(s16) = G_ABS [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ABS]](s16) + ; CHECK-NEXT: G_STORE [[ANYEXT]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s16) = G_ABS %0 + %7:_(s32) = G_ANYEXT %6(s16) + G_STORE %7(s32), %1(p1) :: (store (s16), addrspace 1) + S_ENDPGM 0 +... + +--- +name: uniform_i1_phi +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: uniform_i1_phi + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[PHI]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C3]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x30000000), %bb.3(0x50000000) + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %6:_(s32) = G_CONSTANT i32 6 + %7:_(s1) = G_ICMP intpred(uge), %1(s32), %6 + %8:_(s32) = G_CONSTANT i32 0 + %9:_(s1) = G_ICMP intpred(ne), %2(s32), %8 + G_BRCOND %9(s1), %bb.3 + G_BR %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %19:_(s32) = G_CONSTANT i32 1 + %11:_(s1) = G_ICMP intpred(ult), %1(s32), %19 + + bb.3: + %12:_(s1) = G_PHI %7(s1), %bb.1, %11(s1), %bb.2 + %17:_(s32) = G_SEXT %12(s1) + %18:_(s32) = G_CONSTANT i32 2 + %13:_(s32) = G_ADD %17, %18 + G_STORE %13(s32), %0(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vcc_to_scc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: vcc_to_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_FCONSTANT float 0.000000e+00 + %8:_(s1) = G_FCMP floatpred(oeq), %0(s32), %7 + %9:_(s32) = G_SELECT %8(s1), %1, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: scc_to_vcc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: scc_to_vcc + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_CONSTANT i32 0 + %8:_(s1) = G_ICMP intpred(eq), %0(s32), %7 + %9:_(s32) = G_SELECT %8(s1), %1, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_to_vcc_trunc +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; CHECK-LABEL: name: vgpr_to_vcc_trunc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[COPY1]], [[COPY2]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(s32) = COPY $vgpr4 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s1) = G_TRUNC %0(s32) + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: zext +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: zext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[ZEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(eq), %0(s32), %5 + %7:_(s32) = G_ZEXT %6(s1) + G_STORE %7(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: sext +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: sext + ; CHECK: liveins: $sgpr0, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; CHECK-NEXT: G_STORE [[SEXT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(eq), %0(s32), %5 + %7:_(s32) = G_SEXT %6(s1) + G_STORE %7(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_vcc +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; CHECK-LABEL: name: and_i1_vcc + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 10 + %7:_(s1) = G_ICMP intpred(uge), %0(s32), %6 + %8:_(s32) = G_CONSTANT i32 20 + %9:_(s1) = G_ICMP intpred(uge), %1(s32), %8 + %10:_(s1) = G_AND %7, %9 + %11:_(s32) = G_SELECT %10(s1), %0, %1 + G_STORE %11(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_scc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: and_i1_scc + ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; CHECK-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 10 + %7:_(s1) = G_ICMP intpred(uge), %0(s32), %6 + %8:_(s32) = G_CONSTANT i32 20 + %9:_(s1) = G_ICMP intpred(uge), %1(s32), %8 + %10:_(s1) = G_AND %7, %9 + %11:_(s32) = G_SELECT %10(s1), %0, %1 + G_STORE %11(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_phi_with_uniform_inputs +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_phi_with_uniform_inputs + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: G_STORE [[PHI]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 0 + %6:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %5 + %8:sreg_32_xm0_xexec(s32) = SI_IF %6(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %13:_(s32) = G_CONSTANT i32 1 + + bb.3: + %9:_(s32) = G_PHI %5(s32), %bb.1, %13(s32), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %8(s32) + G_STORE %9(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_because_of_temporal_divergent_use +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: divergent_because_of_temporal_divergent_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C2]] + ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[ADD]](s32) + ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.1 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[PHI2]], [[C3]] + ; CHECK-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %18:_(s32) = G_CONSTANT i32 -1 + %17:_(s32) = G_CONSTANT i32 0 + + bb.2: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %5:_(s32) = G_PHI %11(s32), %bb.2, %17(s32), %bb.1 + %6:_(s32) = G_PHI %18(s32), %bb.1, %8(s32), %bb.2 + %22:_(s32) = G_CONSTANT i32 1 + %8:_(s32) = G_ADD %6, %22 + %9:_(s32) = G_UITOFP %8(s32) + %10:_(s1) = G_FCMP floatpred(ogt), %9(s32), %0 + %11:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(s1), %5(s32) + SI_LOOP %11(s32), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.3 + + bb.3: + %13:_(s32) = G_PHI %8(s32), %bb.2 + %14:_(s32) = G_PHI %11(s32), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) + %21:_(s32) = G_CONSTANT i32 10 + %16:_(s32) = G_MUL %13, %21 + G_STORE %16(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: loop_with_2breaks +legalized: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: loop_with_2breaks + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[C5]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[COPY9]](s1) + ; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY11]](s1), [[PHI1]](s32) + ; CHECK-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: G_BR %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C7]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]] + ; CHECK-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY9]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY13]](s1) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; CHECK-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc + ; CHECK-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc + ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INT]](s32), %bb.3 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 + %11:sreg_32(s1) = IMPLICIT_DEF + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 + %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 + %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 + %18:sreg_32(s1) = COPY %12(s1) + %19:_(s64) = G_SEXT %16(s32) + %20:_(s32) = G_CONSTANT i32 2 + %21:_(s64) = G_SHL %19, %20(s32) + %22:_(p1) = G_PTR_ADD %5, %21(s64) + %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) + %24:_(s32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 + %26:_(s1) = G_CONSTANT i1 true + %27:sreg_32(s1) = COPY %26(s1) + %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc + %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc + %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc + %31:sreg_32(s1) = COPY %30(s1) + %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %33:_(s32) = G_CONSTANT i32 2 + %34:_(s64) = G_SHL %19, %33(s32) + %35:_(p1) = G_PTR_ADD %8, %34(s64) + %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) + %37:_(s32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 + %39:_(s1) = G_CONSTANT i1 true + %40:sreg_32(s1) = COPY %39(s1) + %41:sreg_32(s1) = COPY %40(s1) + %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.4 + + bb.3: + successors: %bb.6(0x04000000), %bb.1(0x7c000000) + + %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 + %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 + %45:sreg_32(s1) = COPY %13(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) + %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) + SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.6 + + bb.4: + successors: %bb.5(0x80000000) + + %46:_(s32) = G_CONSTANT i32 2 + %47:_(s64) = G_SHL %19, %46(s32) + %48:_(p1) = G_PTR_ADD %2, %47(s64) + %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) + %50:_(s32) = G_CONSTANT i32 1 + %51:_(s32) = G_ADD %49, %50 + G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) + %52:_(s32) = G_ADD %16, %50 + %53:_(s32) = G_CONSTANT i32 100 + %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 + %55:sreg_32(s1) = COPY %54(s1) + %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc + %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc + %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + + bb.5: + successors: %bb.3(0x80000000) + + %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 + %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 + %60:sreg_32(s1) = COPY %59(s1) + %61:sreg_32(s1) = COPY %60(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) + %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc + %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc + %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + G_BR %bb.3 + + bb.6: + %64:_(s32) = G_PHI %15(s32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.ll new file mode 100644 index 00000000000000..0b4eb458b254fd --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS_GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=OLD_RBS_GFX12 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=NEW_RBS_GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=NEW_RBS_GFX12 %s + +define amdgpu_ps void @salu_float(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { +; OLD_RBS_GFX10-LABEL: salu_float: +; OLD_RBS_GFX10: ; %bb.0: +; OLD_RBS_GFX10-NEXT: v_add_f32_e64 v2, s0, s1 +; OLD_RBS_GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 +; OLD_RBS_GFX10-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; OLD_RBS_GFX10-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS_GFX10-NEXT: s_endpgm +; +; OLD_RBS_GFX12-LABEL: salu_float: +; OLD_RBS_GFX12: ; %bb.0: +; OLD_RBS_GFX12-NEXT: s_add_f32 s0, s0, s1 +; OLD_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; OLD_RBS_GFX12-NEXT: s_cvt_u32_f32 s0, s0 +; OLD_RBS_GFX12-NEXT: s_add_co_i32 s0, s0, s2 +; OLD_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; OLD_RBS_GFX12-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS_GFX12-NEXT: global_store_b32 v[0:1], v2, off +; OLD_RBS_GFX12-NEXT: s_endpgm +; +; NEW_RBS_GFX10-LABEL: salu_float: +; NEW_RBS_GFX10: ; %bb.0: +; NEW_RBS_GFX10-NEXT: v_add_f32_e64 v2, s0, s1 +; NEW_RBS_GFX10-NEXT: v_cvt_u32_f32_e32 v2, v2 +; NEW_RBS_GFX10-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; NEW_RBS_GFX10-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS_GFX10-NEXT: s_endpgm +; +; NEW_RBS_GFX12-LABEL: salu_float: +; NEW_RBS_GFX12: ; %bb.0: +; NEW_RBS_GFX12-NEXT: s_add_f32 s0, s0, s1 +; NEW_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) +; NEW_RBS_GFX12-NEXT: s_cvt_u32_f32 s0, s0 +; NEW_RBS_GFX12-NEXT: s_add_co_i32 s0, s0, s2 +; NEW_RBS_GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; NEW_RBS_GFX12-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS_GFX12-NEXT: global_store_b32 v[0:1], v2, off +; NEW_RBS_GFX12-NEXT: s_endpgm + %add = fadd float %a, %b + %add.i32 = fptoui float %add to i32 + %res = add i32 %add.i32, %c + store i32 %res, ptr addrspace(1) %ptr + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir new file mode 100644 index 00000000000000..2c996c443a427e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui-salu-float.mir @@ -0,0 +1,92 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=OLD_RBS_GFX10 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=OLD_RBS_GFX12 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=NEW_RBS_GFX10 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=NEW_RBS_GFX12 + +--- +name: salu_float +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; OLD_RBS_GFX10-LABEL: name: salu_float + ; OLD_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS_GFX10-NEXT: {{ $}} + ; OLD_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; OLD_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; OLD_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; OLD_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; OLD_RBS_GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS_GFX10-NEXT: S_ENDPGM 0 + ; + ; OLD_RBS_GFX12-LABEL: name: salu_float + ; OLD_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS_GFX12-NEXT: {{ $}} + ; OLD_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] + ; OLD_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32) + ; OLD_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; OLD_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; OLD_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS_GFX12-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS_GFX10-LABEL: name: salu_float + ; NEW_RBS_GFX10: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS_GFX10-NEXT: {{ $}} + ; NEW_RBS_GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS_GFX10-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS_GFX10-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS_GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS_GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS_GFX10-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS_GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS_GFX10-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS_GFX10-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; NEW_RBS_GFX10-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; NEW_RBS_GFX10-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; NEW_RBS_GFX10-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; NEW_RBS_GFX10-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS_GFX10-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS_GFX12-LABEL: name: salu_float + ; NEW_RBS_GFX12: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS_GFX12-NEXT: {{ $}} + ; NEW_RBS_GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS_GFX12-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS_GFX12-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS_GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS_GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS_GFX12-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS_GFX12-NEXT: [[FADD:%[0-9]+]]:sgpr(s32) = G_FADD [[COPY]], [[COPY1]] + ; NEW_RBS_GFX12-NEXT: [[FPTOUI:%[0-9]+]]:sgpr(s32) = G_FPTOUI [[FADD]](s32) + ; NEW_RBS_GFX12-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[FPTOUI]], [[COPY2]] + ; NEW_RBS_GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; NEW_RBS_GFX12-NEXT: G_STORE [[COPY5]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS_GFX12-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_FADD %0, %1 + %8:_(s32) = G_FPTOUI %7(s32) + %9:_(s32) = G_ADD %8, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll new file mode 100644 index 00000000000000..287a8ab0e52f52 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll @@ -0,0 +1,635 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=OLD_RBS %s +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=NEW_RBS %s + +; if instruction is uniform and there is available instruction, select SALU instruction +define amdgpu_ps void @uniform_in_vgpr(float inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: uniform_in_vgpr: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0 +; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s1, v2 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: uniform_in_vgpr: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, s0 +; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, s1, v2 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %a.i32 = fptoui float %a to i32 + %res = add i32 %a.i32, %b + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; copy sgpr to vgpr + readfirstlane vgpr to sgpr combine from rb-legalize +define amdgpu_ps void @back_to_back_uniform_in_vgpr(float inreg %a, float inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: back_to_back_uniform_in_vgpr: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_add_f32_e64 v2, s0, s1 +; OLD_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2 +; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: back_to_back_uniform_in_vgpr: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_add_f32_e64 v2, s0, s1 +; NEW_RBS-NEXT: v_cvt_u32_f32_e32 v2, v2 +; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, s2, v2 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %add = fadd float %a, %b + %add.i32 = fptoui float %add to i32 + %res = add i32 %add.i32, %c + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; fast rules for vector instructions +define amdgpu_cs void @buffer_load_uniform(<4 x i32> inreg %rsrc, i32 inreg %voffset, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: buffer_load_uniform: +; OLD_RBS: ; %bb.0: ; %.entry +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s4 +; OLD_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_add_nc_u32_e32 v2, 1, v3 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: buffer_load_uniform: +; NEW_RBS: ; %bb.0: ; %.entry +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s4 +; NEW_RBS-NEXT: buffer_load_dwordx4 v[2:5], v2, s[0:3], 0 offen +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_add_nc_u32_e32 v2, 1, v3 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm +.entry: + %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) + %el1 = extractelement <4 x i32> %vec, i64 1 + %res = add i32 %el1, 1 + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_cs void @buffer_load_divergent(<4 x i32> inreg %rsrc, i32 %voffset, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: buffer_load_divergent: +; OLD_RBS: ; %bb.0: ; %.entry +; OLD_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4 +; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: buffer_load_divergent: +; NEW_RBS: ; %bb.0: ; %.entry +; NEW_RBS-NEXT: buffer_load_dwordx4 v[3:6], v0, s[0:3], 0 offen +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_add_nc_u32_e32 v0, 1, v4 +; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off +; NEW_RBS-NEXT: s_endpgm +.entry: + %vec = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %voffset, i32 0, i32 0) + %el1 = extractelement <4 x i32> %vec, i64 1 + %res = add i32 %el1, 1 + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +;lowering in rb-legalize (sgpr S64 is legal, vgpr has to be split to S32) +define amdgpu_ps void @vgpr_and_i64(i64 %a, i64 %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: vgpr_and_i64: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_and_b32_e32 v0, v0, v2 +; OLD_RBS-NEXT: v_and_b32_e32 v1, v1, v3 +; OLD_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: vgpr_and_i64: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_and_b32_e32 v0, v0, v2 +; NEW_RBS-NEXT: v_and_b32_e32 v1, v1, v3 +; NEW_RBS-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; NEW_RBS-NEXT: s_endpgm + %res = and i64 %a, %b + store i64 %res, ptr addrspace(1) %ptr + ret void +} + +; It is up to user instruction to deal with potential truncated bits in reg. +; Here G_ABS needs to sign extend S16 in reg to S32 and then do S32 G_ABS. +define amdgpu_ps void @abs_sgpr_i16(i16 inreg %arg, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: abs_sgpr_i16: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_sext_i32_i16 s0, s0 +; OLD_RBS-NEXT: s_abs_i32 s0, s0 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_short v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: abs_sgpr_i16: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_sext_i32_i16 s0, s0 +; NEW_RBS-NEXT: s_abs_i32 s0, s0 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_short v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %res = call i16 @llvm.abs.i16(i16 %arg, i1 false) + store i16 %res, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps void @uniform_i1_phi(ptr addrspace(1) %out, i32 inreg %tid, i32 inreg %cond) { +; OLD_RBS-LABEL: uniform_i1_phi: +; OLD_RBS: ; %bb.0: ; %A +; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 6 +; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; OLD_RBS-NEXT: s_cmp_lg_u32 s1, 0 +; OLD_RBS-NEXT: s_cbranch_scc1 .LBB6_2 +; OLD_RBS-NEXT: ; %bb.1: ; %B +; OLD_RBS-NEXT: s_cmp_lt_u32 s0, 1 +; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; OLD_RBS-NEXT: .LBB6_2: ; %exit +; OLD_RBS-NEXT: s_bfe_i32 s0, s2, 0x10000 +; OLD_RBS-NEXT: s_add_i32 s0, s0, 2 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: uniform_i1_phi: +; NEW_RBS: ; %bb.0: ; %A +; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 6 +; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; NEW_RBS-NEXT: s_cmp_lg_u32 s1, 0 +; NEW_RBS-NEXT: s_cbranch_scc1 .LBB6_2 +; NEW_RBS-NEXT: ; %bb.1: ; %B +; NEW_RBS-NEXT: s_cmp_lt_u32 s0, 1 +; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; NEW_RBS-NEXT: .LBB6_2: ; %exit +; NEW_RBS-NEXT: s_bfe_i32 s0, s2, 0x10000 +; NEW_RBS-NEXT: s_add_i32 s0, s0, 2 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm +A: + %val_A = icmp uge i32 %tid, 6 + %cmp = icmp eq i32 %cond, 0 + br i1 %cmp, label %B, label %exit + +B: + %val_B = icmp ult i32 %tid, 1 + br label %exit + +exit: + %phi = phi i1 [ %val_A, %A ], [ %val_B, %B ] + %sel = select i1 %phi, i32 1, i32 2 + store i32 %sel, ptr addrspace(1) %out + ret void +} + +; this is kind of i1 readfirstlane +; uniform i1 result on instruction that is only available on VALU +define amdgpu_ps void @vcc_to_scc(float inreg %a, i32 inreg %b, i32 inreg %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: vcc_to_scc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s2 +; OLD_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; OLD_RBS-NEXT: v_cndmask_b32_e64 v2, v2, s1, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: vcc_to_scc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s2 +; NEW_RBS-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; NEW_RBS-NEXT: v_cndmask_b32_e64 v2, v2, s1, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %vcc_to_scc = fcmp oeq float %a, 0.0 + %select = select i1 %vcc_to_scc, i32 %b, i32 %c + store i32 %select, ptr addrspace(1) %ptr + ret void +} + +; combiner in rb-legalize recognizes sgpr S1 to vcc copy +define amdgpu_ps void @scc_to_vcc(i32 inreg %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: scc_to_vcc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 0 +; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; OLD_RBS-NEXT: s_and_b32 s0, 1, s0 +; OLD_RBS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: scc_to_vcc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 0 +; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; NEW_RBS-NEXT: s_and_b32 s0, 1, s0 +; NEW_RBS-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 +; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off +; NEW_RBS-NEXT: s_endpgm + %scc_to_vcc = icmp eq i32 %a, 0 + %select = select i1 %scc_to_vcc, i32 %b, i32 %c + store i32 %select, ptr addrspace(1) %ptr + ret void +} + +; this is only G_TRUNC that is not no-op in global-isel for AMDGPU +define amdgpu_ps void @vgpr_to_vcc_trunc(i32 %a, i32 %b, i32 %c, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: vgpr_to_vcc_trunc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_and_b32_e32 v0, 1, v0 +; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; OLD_RBS-NEXT: global_store_dword v[3:4], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: vgpr_to_vcc_trunc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_and_b32_e32 v0, 1, v0 +; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo +; NEW_RBS-NEXT: global_store_dword v[3:4], v0, off +; NEW_RBS-NEXT: s_endpgm + %vcc = trunc i32 %a to i1 + %select = select i1 %vcc, i32 %b, i32 %c + store i32 %select, ptr addrspace(1) %ptr + ret void +} + +; i1 input to zext and sext is something that survived legalizer (not trunc) +; lower to select +define amdgpu_ps void @zext(i32 inreg %a, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: zext: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: zext: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %bool = icmp eq i32 %a, 10 + %zext = zext i1 %bool to i32 + store i32 %zext, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_ps void @sext(i32 inreg %a, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: sext: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; OLD_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; OLD_RBS-NEXT: s_bfe_i32 s0, s0, 0x10000 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: sext: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_eq_u32 s0, 10 +; NEW_RBS-NEXT: s_cselect_b32 s0, 1, 0 +; NEW_RBS-NEXT: s_bfe_i32 s0, s0, 0x10000 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %bool = icmp eq i32 %a, 10 + %sext = sext i1 %bool to i32 + store i32 %sext, ptr addrspace(1) %ptr + ret void +} + +; divergent i1 bitwise, i1 vcc. +; inst selected into s_and_b32 on wave32 or s_and_b64 on wave64. +define amdgpu_ps void @and_i1_vcc(i32 %a, i32 %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: and_i1_vcc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0 +; OLD_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1 +; OLD_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; OLD_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; OLD_RBS-NEXT: global_store_dword v[2:3], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: and_i1_vcc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: v_cmp_le_u32_e32 vcc_lo, 10, v0 +; NEW_RBS-NEXT: v_cmp_le_u32_e64 s0, 20, v1 +; NEW_RBS-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 +; NEW_RBS-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo +; NEW_RBS-NEXT: global_store_dword v[2:3], v0, off +; NEW_RBS-NEXT: s_endpgm + %cmp_a = icmp uge i32 %a, 10 + %cmp_b = icmp uge i32 %b, 20 + %cc = and i1 %cmp_a, %cmp_b + %res = select i1 %cc, i32 %a, i32 %b + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; uniform i1 bitwise, i32 sgpr. inst selected into s_and_b32. +define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %ptr) { +; OLD_RBS-LABEL: and_i1_scc: +; OLD_RBS: ; %bb.0: +; OLD_RBS-NEXT: s_cmp_ge_u32 s0, 10 +; OLD_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; OLD_RBS-NEXT: s_cmp_ge_u32 s1, 20 +; OLD_RBS-NEXT: s_cselect_b32 s3, 1, 0 +; OLD_RBS-NEXT: s_and_b32 s2, s2, s3 +; OLD_RBS-NEXT: s_and_b32 s2, s2, 1 +; OLD_RBS-NEXT: s_cmp_lg_u32 s2, 0 +; OLD_RBS-NEXT: s_cselect_b32 s0, s0, s1 +; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0 +; OLD_RBS-NEXT: global_store_dword v[0:1], v2, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: and_i1_scc: +; NEW_RBS: ; %bb.0: +; NEW_RBS-NEXT: s_cmp_ge_u32 s0, 10 +; NEW_RBS-NEXT: s_cselect_b32 s2, 1, 0 +; NEW_RBS-NEXT: s_cmp_ge_u32 s1, 20 +; NEW_RBS-NEXT: s_cselect_b32 s3, 1, 0 +; NEW_RBS-NEXT: s_and_b32 s2, s2, s3 +; NEW_RBS-NEXT: s_and_b32 s2, s2, 1 +; NEW_RBS-NEXT: s_cmp_lg_u32 s2, 0 +; NEW_RBS-NEXT: s_cselect_b32 s0, s0, s1 +; NEW_RBS-NEXT: v_mov_b32_e32 v2, s0 +; NEW_RBS-NEXT: global_store_dword v[0:1], v2, off +; NEW_RBS-NEXT: s_endpgm + %cmp_a = icmp uge i32 %a, 10 + %cmp_b = icmp uge i32 %b, 20 + %cc = and i1 %cmp_a, %cmp_b + %res = select i1 %cc, i32 %a, i32 %b + store i32 %res, ptr addrspace(1) %ptr + ret void +} + +; old RBS selects sgpr phi because it had sgpr inputs. +define amdgpu_ps void @divergent_phi_with_uniform_inputs(i32 %a, ptr addrspace(1) %out) { +; OLD_RBS-LABEL: divergent_phi_with_uniform_inputs: +; OLD_RBS: ; %bb.0: ; %A +; OLD_RBS-NEXT: s_mov_b32 s0, 0 +; OLD_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; OLD_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo +; OLD_RBS-NEXT: ; %bb.1: ; %B +; OLD_RBS-NEXT: s_mov_b32 s0, 1 +; OLD_RBS-NEXT: ; %bb.2: ; %exit +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; OLD_RBS-NEXT: v_mov_b32_e32 v0, s0 +; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: divergent_phi_with_uniform_inputs: +; NEW_RBS: ; %bb.0: ; %A +; NEW_RBS-NEXT: s_mov_b32 s0, 0 +; NEW_RBS-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; NEW_RBS-NEXT: s_and_saveexec_b32 s1, vcc_lo +; NEW_RBS-NEXT: ; %bb.1: ; %B +; NEW_RBS-NEXT: s_mov_b32 s0, 1 +; NEW_RBS-NEXT: ; %bb.2: ; %exit +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; NEW_RBS-NEXT: v_mov_b32_e32 v0, s0 +; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off +; NEW_RBS-NEXT: s_endpgm +A: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %B, label %exit + +B: + br label %exit + +exit: + %phi = phi i32 [ 0, %A ], [ 1, %B ] + store i32 %phi, ptr addrspace(1) %out + ret void +} + +; old RBS assigned vgpr to uniform phi (because one input had undetermined bank) +; and it propagated to mul, which was not wrong. +; new RBS assigns vgpr to destination of mul even though both inputs are sgpr. +; TODO: implement temporal divergence lowering +define amdgpu_ps void @divergent_because_of_temporal_divergent_use(float %val, ptr addrspace(1) %addr) { +; OLD_RBS-LABEL: divergent_because_of_temporal_divergent_use: +; OLD_RBS: ; %bb.0: ; %entry +; OLD_RBS-NEXT: s_mov_b32 s0, -1 +; OLD_RBS-NEXT: v_mov_b32_e32 v3, s0 +; OLD_RBS-NEXT: s_mov_b32 s0, 0 +; OLD_RBS-NEXT: .LBB15_1: ; %loop +; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; OLD_RBS-NEXT: v_add_nc_u32_e32 v3, 1, v3 +; OLD_RBS-NEXT: v_cvt_f32_u32_e32 v4, v3 +; OLD_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; OLD_RBS-NEXT: s_or_b32 s0, vcc_lo, s0 +; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; OLD_RBS-NEXT: s_cbranch_execnz .LBB15_1 +; OLD_RBS-NEXT: ; %bb.2: ; %exit +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; OLD_RBS-NEXT: v_mul_lo_u32 v0, v3, 10 +; OLD_RBS-NEXT: global_store_dword v[1:2], v0, off +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: divergent_because_of_temporal_divergent_use: +; NEW_RBS: ; %bb.0: ; %entry +; NEW_RBS-NEXT: s_mov_b32 s0, -1 +; NEW_RBS-NEXT: v_mov_b32_e32 v3, s0 +; NEW_RBS-NEXT: s_mov_b32 s0, 0 +; NEW_RBS-NEXT: .LBB15_1: ; %loop +; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; NEW_RBS-NEXT: v_add_nc_u32_e32 v3, 1, v3 +; NEW_RBS-NEXT: v_cvt_f32_u32_e32 v4, v3 +; NEW_RBS-NEXT: v_cmp_gt_f32_e32 vcc_lo, v4, v0 +; NEW_RBS-NEXT: s_or_b32 s0, vcc_lo, s0 +; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; NEW_RBS-NEXT: s_cbranch_execnz .LBB15_1 +; NEW_RBS-NEXT: ; %bb.2: ; %exit +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s0 +; NEW_RBS-NEXT: v_mul_lo_u32 v0, v3, 10 +; NEW_RBS-NEXT: global_store_dword v[1:2], v0, off +; NEW_RBS-NEXT: s_endpgm +entry: + br label %loop + +loop: + %counter = phi i32 [ 0, %entry ], [ %counter.plus.1, %loop ] + %f.counter = uitofp i32 %counter to float + %cond = fcmp ogt float %f.counter, %val + %counter.plus.1 = add i32 %counter, 1 + br i1 %cond, label %exit, label %loop + +exit: + %ceilx10 = mul i32 %counter, 10 + store i32 %ceilx10, ptr addrspace(1) %addr + ret void +} + +; Variables that hande counter can be allocated to sgprs. +; Machine uniformity analysis claims some of those registers are divergent while +; LLVM-IR uniformity analysis claims corresponding values are uniform. +; TODO: fix this in Machine uniformity analysis. +define amdgpu_cs void @loop_with_2breaks(ptr addrspace(1) %x, ptr addrspace(1) %a, ptr addrspace(1) %b) { +; OLD_RBS-LABEL: loop_with_2breaks: +; OLD_RBS: ; %bb.0: ; %entry +; OLD_RBS-NEXT: s_mov_b32 s0, 0 +; OLD_RBS-NEXT: ; implicit-def: $sgpr1 +; OLD_RBS-NEXT: v_mov_b32_e32 v6, s0 +; OLD_RBS-NEXT: s_branch .LBB16_3 +; OLD_RBS-NEXT: .LBB16_1: ; %Flow3 +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: s_waitcnt_depctr 0xffe3 +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s3 +; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; OLD_RBS-NEXT: s_and_b32 s3, exec_lo, s4 +; OLD_RBS-NEXT: s_or_b32 s1, s1, s3 +; OLD_RBS-NEXT: .LBB16_2: ; %Flow +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, s1 +; OLD_RBS-NEXT: s_or_b32 s0, s2, s0 +; OLD_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; OLD_RBS-NEXT: s_cbranch_execz .LBB16_6 +; OLD_RBS-NEXT: .LBB16_3: ; %A +; OLD_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; OLD_RBS-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; OLD_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; OLD_RBS-NEXT: s_and_b32 s2, exec_lo, -1 +; OLD_RBS-NEXT: s_or_b32 s1, s1, s2 +; OLD_RBS-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] +; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 +; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo +; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; OLD_RBS-NEXT: s_and_saveexec_b32 s2, vcc_lo +; OLD_RBS-NEXT: s_cbranch_execz .LBB16_2 +; OLD_RBS-NEXT: ; %bb.4: ; %B +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 +; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo +; OLD_RBS-NEXT: s_mov_b32 s4, -1 +; OLD_RBS-NEXT: global_load_dword v9, v[9:10], off +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; OLD_RBS-NEXT: s_and_saveexec_b32 s3, vcc_lo +; OLD_RBS-NEXT: s_cbranch_execz .LBB16_1 +; OLD_RBS-NEXT: ; %bb.5: ; %loop.body +; OLD_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; OLD_RBS-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 +; OLD_RBS-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo +; OLD_RBS-NEXT: v_add_nc_u32_e32 v10, 1, v6 +; OLD_RBS-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 +; OLD_RBS-NEXT: s_andn2_b32 s4, -1, exec_lo +; OLD_RBS-NEXT: global_load_dword v9, v[7:8], off +; OLD_RBS-NEXT: v_mov_b32_e32 v6, v10 +; OLD_RBS-NEXT: s_and_b32 s5, exec_lo, vcc_lo +; OLD_RBS-NEXT: s_or_b32 s4, s4, s5 +; OLD_RBS-NEXT: s_waitcnt vmcnt(0) +; OLD_RBS-NEXT: v_add_nc_u32_e32 v9, 1, v9 +; OLD_RBS-NEXT: global_store_dword v[7:8], v9, off +; OLD_RBS-NEXT: s_branch .LBB16_1 +; OLD_RBS-NEXT: .LBB16_6: ; %exit +; OLD_RBS-NEXT: s_endpgm +; +; NEW_RBS-LABEL: loop_with_2breaks: +; NEW_RBS: ; %bb.0: ; %entry +; NEW_RBS-NEXT: s_mov_b32 s0, 0 +; NEW_RBS-NEXT: ; implicit-def: $sgpr1 +; NEW_RBS-NEXT: v_mov_b32_e32 v6, s0 +; NEW_RBS-NEXT: s_branch .LBB16_3 +; NEW_RBS-NEXT: .LBB16_1: ; %Flow3 +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: s_waitcnt_depctr 0xffe3 +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s3 +; NEW_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; NEW_RBS-NEXT: s_and_b32 s3, exec_lo, s4 +; NEW_RBS-NEXT: s_or_b32 s1, s1, s3 +; NEW_RBS-NEXT: .LBB16_2: ; %Flow +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: s_or_b32 exec_lo, exec_lo, s2 +; NEW_RBS-NEXT: s_and_b32 s2, exec_lo, s1 +; NEW_RBS-NEXT: s_or_b32 s0, s2, s0 +; NEW_RBS-NEXT: s_andn2_b32 exec_lo, exec_lo, s0 +; NEW_RBS-NEXT: s_cbranch_execz .LBB16_6 +; NEW_RBS-NEXT: .LBB16_3: ; %A +; NEW_RBS-NEXT: ; =>This Inner Loop Header: Depth=1 +; NEW_RBS-NEXT: v_ashrrev_i32_e32 v7, 31, v6 +; NEW_RBS-NEXT: s_andn2_b32 s1, s1, exec_lo +; NEW_RBS-NEXT: s_and_b32 s2, exec_lo, -1 +; NEW_RBS-NEXT: s_or_b32 s1, s1, s2 +; NEW_RBS-NEXT: v_lshlrev_b64 v[7:8], 2, v[6:7] +; NEW_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v2, v7 +; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v3, v8, vcc_lo +; NEW_RBS-NEXT: global_load_dword v9, v[9:10], off +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; NEW_RBS-NEXT: s_and_saveexec_b32 s2, vcc_lo +; NEW_RBS-NEXT: s_cbranch_execz .LBB16_2 +; NEW_RBS-NEXT: ; %bb.4: ; %B +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: v_add_co_u32 v9, vcc_lo, v4, v7 +; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, v5, v8, vcc_lo +; NEW_RBS-NEXT: s_mov_b32 s4, -1 +; NEW_RBS-NEXT: global_load_dword v9, v[9:10], off +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v9 +; NEW_RBS-NEXT: s_and_saveexec_b32 s3, vcc_lo +; NEW_RBS-NEXT: s_cbranch_execz .LBB16_1 +; NEW_RBS-NEXT: ; %bb.5: ; %loop.body +; NEW_RBS-NEXT: ; in Loop: Header=BB16_3 Depth=1 +; NEW_RBS-NEXT: v_add_co_u32 v7, vcc_lo, v0, v7 +; NEW_RBS-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, v1, v8, vcc_lo +; NEW_RBS-NEXT: v_add_nc_u32_e32 v10, 1, v6 +; NEW_RBS-NEXT: v_cmp_gt_u32_e32 vcc_lo, 0x64, v6 +; NEW_RBS-NEXT: s_andn2_b32 s4, -1, exec_lo +; NEW_RBS-NEXT: global_load_dword v9, v[7:8], off +; NEW_RBS-NEXT: v_mov_b32_e32 v6, v10 +; NEW_RBS-NEXT: s_and_b32 s5, exec_lo, vcc_lo +; NEW_RBS-NEXT: s_or_b32 s4, s4, s5 +; NEW_RBS-NEXT: s_waitcnt vmcnt(0) +; NEW_RBS-NEXT: v_add_nc_u32_e32 v9, 1, v9 +; NEW_RBS-NEXT: global_store_dword v[7:8], v9, off +; NEW_RBS-NEXT: s_branch .LBB16_1 +; NEW_RBS-NEXT: .LBB16_6: ; %exit +; NEW_RBS-NEXT: s_endpgm +entry: + br label %A + +A: + %counter = phi i32 [ %counter.plus.1, %loop.body ], [ 0, %entry ] + %a.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %counter + %a.val = load i32, ptr addrspace(1) %a.plus.counter + %a.cond = icmp eq i32 %a.val, 0 + br i1 %a.cond, label %exit, label %B + +B: + %b.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %b, i32 %counter + %b.val = load i32, ptr addrspace(1) %b.plus.counter + %b.cond = icmp eq i32 %b.val, 0 + br i1 %b.cond, label %exit, label %loop.body + +loop.body: + %x.plus.counter = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %counter + %x.val = load i32, ptr addrspace(1) %x.plus.counter + %x.val.plus.1 = add i32 %x.val, 1 + store i32 %x.val.plus.1, ptr addrspace(1) %x.plus.counter + %counter.plus.1 = add i32 %counter, 1 + %x.cond = icmp ult i32 %counter, 100 + br i1 %x.cond, label %exit, label %A + +exit: + ret void +} + +declare i16 @llvm.abs.i16(i16, i1) +declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32 immarg) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir new file mode 100644 index 00000000000000..d385225192dbb0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.mir @@ -0,0 +1,1377 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=OLD_RBS +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s -check-prefixes=NEW_RBS + +--- +name: uniform_in_vgpr +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: uniform_in_vgpr + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY4]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY5]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: uniform_in_vgpr + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[COPY4]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY5]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_FPTOUI %0(s32) + %7:_(s32) = G_ADD %6, %1 + G_STORE %7(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: back_to_back_uniform_in_vgpr +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: back_to_back_uniform_in_vgpr + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; OLD_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: back_to_back_uniform_in_vgpr + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS-NEXT: [[FADD:%[0-9]+]]:vgpr(s32) = G_FADD [[COPY5]], [[COPY6]] + ; NEW_RBS-NEXT: [[FPTOUI:%[0-9]+]]:vgpr(s32) = G_FPTOUI [[FADD]](s32) + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[FPTOUI]], [[COPY7]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_FADD %0, %1 + %8:_(s32) = G_FPTOUI %7(s32) + %9:_(s32) = G_ADD %8, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_uniform +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: buffer_load_uniform + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY9]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: buffer_load_uniform + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY9]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = COPY $sgpr2 + %6:_(s32) = COPY $sgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + %1:_(s32) = COPY $sgpr4 + %7:_(s32) = COPY $vgpr0 + %8:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %11:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %0(<4 x s32>), %11(s32), %1, %11, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %13:_(s32) = G_CONSTANT i32 1 + %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %14:_(s32) = G_ADD %16, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: buffer_load_divergent +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + + ; OLD_RBS-LABEL: name: buffer_load_divergent + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; OLD_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY8]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: buffer_load_divergent + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; NEW_RBS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[UV1]], [[COPY8]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $sgpr0 + %4:_(s32) = COPY $sgpr1 + %5:_(s32) = COPY $sgpr2 + %6:_(s32) = COPY $sgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32), %6(s32) + %1:_(s32) = COPY $vgpr0 + %7:_(s32) = COPY $vgpr1 + %8:_(s32) = COPY $vgpr2 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %11:_(s32) = G_CONSTANT i32 0 + %10:_(<4 x s32>) = G_AMDGPU_BUFFER_LOAD %0(<4 x s32>), %11(s32), %1, %11, 0, 0, 0 :: (dereferenceable load (<4 x s32>), align 1, addrspace 8) + %13:_(s32) = G_CONSTANT i32 1 + %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %10(<4 x s32>) + %14:_(s32) = G_ADD %16, %13 + G_STORE %14(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_and_i64 +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; OLD_RBS-LABEL: name: vgpr_and_i64 + ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; OLD_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; OLD_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; OLD_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; OLD_RBS-NEXT: G_STORE [[MV3]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: vgpr_and_i64 + ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; NEW_RBS-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; NEW_RBS-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; NEW_RBS-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + ; NEW_RBS-NEXT: G_STORE [[MV3]](s64), [[MV2]](p1) :: (store (s64), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %3(s32), %4(s32) + %5:_(s32) = COPY $vgpr2 + %6:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr4 + %8:_(s32) = COPY $vgpr5 + %2:_(p1) = G_MERGE_VALUES %7(s32), %8(s32) + %10:_(s64) = G_AND %0, %1 + G_STORE %10(s64), %2(p1) :: (store (s64), addrspace 1) + S_ENDPGM 0 +... + +--- +name: abs_sgpr_i16 +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: abs_sgpr_i16 + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) + ; OLD_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: abs_sgpr_i16 + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s16) + ; NEW_RBS-NEXT: [[ABS:%[0-9]+]]:sgpr(s32) = G_ABS [[SEXT]] + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ABS]](s32) + ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ANYEXT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s16), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %2:_(s32) = COPY $sgpr0 + %0:_(s16) = G_TRUNC %2(s32) + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s16) = G_ABS %0 + %7:_(s32) = G_ANYEXT %6(s16) + G_STORE %7(s32), %1(p1) :: (store (s16), addrspace 1) + S_ENDPGM 0 +... + +--- +name: uniform_i1_phi +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: uniform_i1_phi + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; OLD_RBS-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: G_BRCOND [[ZEXT]](s32), %bb.2 + ; OLD_RBS-NEXT: G_BR %bb.1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) + ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 + ; OLD_RBS-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC3]](s1) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SEXT]], [[C3]] + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: uniform_i1_phi + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x30000000), %bb.2(0x50000000) + ; NEW_RBS-NEXT: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 6 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; NEW_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC1]](s1) + ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: G_BRCOND [[ZEXT]](s32), %bb.2 + ; NEW_RBS-NEXT: G_BR %bb.1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] + ; NEW_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP2]](s32) + ; NEW_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC2]](s1) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[ANYEXT]](s32), %bb.0, [[ANYEXT1]](s32), %bb.1 + ; NEW_RBS-NEXT: [[TRUNC3:%[0-9]+]]:sgpr(s1) = G_TRUNC [[PHI]](s32) + ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC3]](s1) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[SEXT]], [[C3]] + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x30000000), %bb.3(0x50000000) + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $sgpr1 + %6:_(s32) = G_CONSTANT i32 6 + %7:_(s1) = G_ICMP intpred(uge), %1(s32), %6 + %8:_(s32) = G_CONSTANT i32 0 + %9:_(s1) = G_ICMP intpred(ne), %2(s32), %8 + G_BRCOND %9(s1), %bb.3 + G_BR %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %19:_(s32) = G_CONSTANT i32 1 + %11:_(s1) = G_ICMP intpred(ult), %1(s32), %19 + + bb.3: + %12:_(s1) = G_PHI %7(s1), %bb.1, %11(s1), %bb.2 + %17:_(s32) = G_SEXT %12(s1) + %18:_(s32) = G_CONSTANT i32 2 + %13:_(s32) = G_ADD %17, %18 + G_STORE %13(s32), %0(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vcc_to_scc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: vcc_to_scc + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY5]](s32), [[COPY6]] + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY7]], [[COPY8]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: vcc_to_scc + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $sgpr2, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY5]](s32), [[COPY6]] + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY7]], [[COPY8]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_FCONSTANT float 0.000000e+00 + %8:_(s1) = G_FCMP floatpred(oeq), %0(s32), %7 + %9:_(s32) = G_SELECT %8(s1), %1, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: scc_to_vcc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; OLD_RBS-LABEL: name: scc_to_vcc + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: scc_to_vcc + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s32) = G_CONSTANT i32 0 + %8:_(s1) = G_ICMP intpred(eq), %0(s32), %7 + %9:_(s32) = G_SELECT %8(s1), %1, %2 + G_STORE %9(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: vgpr_to_vcc_trunc +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + + ; OLD_RBS-LABEL: name: vgpr_to_vcc_trunc + ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: vgpr_to_vcc_trunc + ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32) + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vcc(s1) = COPY [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[COPY5]](s1), [[COPY1]], [[COPY2]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(s32) = COPY $vgpr4 + %3:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %7:_(s1) = G_TRUNC %0(s32) + %8:_(s32) = G_SELECT %7(s1), %1, %2 + G_STORE %8(s32), %3(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: zext +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: zext + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ZEXT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: zext + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[ZEXT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(eq), %0(s32), %5 + %7:_(s32) = G_ZEXT %6(s1) + G_STORE %7(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: sext +legalized: true +body: | + bb.1: + liveins: $sgpr0, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: sext + ; OLD_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SEXT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: sext + ; NEW_RBS: liveins: $sgpr0, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[SEXT:%[0-9]+]]:sgpr(s32) = G_SEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[SEXT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 10 + %6:_(s1) = G_ICMP intpred(eq), %0(s32), %5 + %7:_(s32) = G_SEXT %6(s1) + G_STORE %7(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_vcc +legalized: true +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; OLD_RBS-LABEL: name: and_i1_vcc + ; OLD_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY4]] + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY5]] + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; OLD_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: and_i1_vcc + ; NEW_RBS: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[COPY4]] + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:vcc(s1) = G_ICMP intpred(uge), [[COPY1]](s32), [[COPY5]] + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:vcc(s1) = G_AND [[ICMP]], [[ICMP1]] + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[AND]](s1), [[COPY]], [[COPY1]] + ; NEW_RBS-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 10 + %7:_(s1) = G_ICMP intpred(uge), %0(s32), %6 + %8:_(s32) = G_CONSTANT i32 20 + %9:_(s1) = G_ICMP intpred(uge), %1(s32), %8 + %10:_(s1) = G_AND %7, %9 + %11:_(s32) = G_SELECT %10(s1), %0, %1 + G_STORE %11(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: and_i1_scc +legalized: true +body: | + bb.1: + liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + + ; OLD_RBS-LABEL: name: and_i1_scc + ; OLD_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; OLD_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; OLD_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; OLD_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) + ; OLD_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC2]](s1) + ; OLD_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY]], [[COPY1]] + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: and_i1_scc + ; NEW_RBS: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY]](s32), [[C]] + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP]](s32) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 20 + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(uge), [[COPY1]](s32), [[C1]] + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[ICMP1]](s32) + ; NEW_RBS-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s1) + ; NEW_RBS-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; NEW_RBS-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s1) = G_TRUNC [[AND]](s32) + ; NEW_RBS-NEXT: [[ZEXT:%[0-9]+]]:sgpr(s32) = G_ZEXT [[TRUNC2]](s1) + ; NEW_RBS-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ZEXT]](s32), [[COPY]], [[COPY1]] + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = G_CONSTANT i32 10 + %7:_(s1) = G_ICMP intpred(uge), %0(s32), %6 + %8:_(s32) = G_CONSTANT i32 20 + %9:_(s1) = G_ICMP intpred(uge), %1(s32), %8 + %10:_(s1) = G_AND %7, %9 + %11:_(s32) = G_SELECT %10(s1), %0, %1 + G_STORE %11(s32), %2(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_phi_with_uniform_inputs +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: divergent_phi_with_uniform_inputs + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[PHI]](s32) + ; OLD_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: divergent_phi_with_uniform_inputs + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY3]] + ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI [[C]](s32), %bb.0, [[C1]](s32), %bb.1 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[PHI]](s32) + ; NEW_RBS-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %5:_(s32) = G_CONSTANT i32 0 + %6:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), %0(s32), %5 + %8:sreg_32_xm0_xexec(s32) = SI_IF %6(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.3(0x80000000) + + %13:_(s32) = G_CONSTANT i32 1 + + bb.3: + %9:_(s32) = G_PHI %5(s32), %bb.1, %13(s32), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %8(s32) + G_STORE %9(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: divergent_because_of_temporal_divergent_use +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: divergent_because_of_temporal_divergent_use + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x80000000) + ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI1]], [[COPY3]] + ; OLD_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[ADD]](s32) + ; OLD_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; OLD_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 + ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; OLD_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]] + ; OLD_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: divergent_because_of_temporal_divergent_use + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x80000000) + ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -1 + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sgpr(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.1 + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI1]], [[COPY3]] + ; NEW_RBS-NEXT: [[UITOFP:%[0-9]+]]:vgpr(s32) = G_UITOFP [[ADD]](s32) + ; NEW_RBS-NEXT: [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] + ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) + ; NEW_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD]](s32), %bb.1 + ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.1 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s32) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 10 + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; NEW_RBS-NEXT: [[MUL:%[0-9]+]]:vgpr(s32) = G_MUL [[PHI2]], [[COPY4]] + ; NEW_RBS-NEXT: G_STORE [[MUL]](s32), [[MV]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.1: + successors: %bb.2(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2 + + %0:_(s32) = COPY $vgpr0 + %2:_(s32) = COPY $vgpr1 + %3:_(s32) = COPY $vgpr2 + %1:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) + %18:_(s32) = G_CONSTANT i32 -1 + %17:_(s32) = G_CONSTANT i32 0 + + bb.2: + successors: %bb.3(0x04000000), %bb.2(0x7c000000) + + %5:_(s32) = G_PHI %11(s32), %bb.2, %17(s32), %bb.1 + %6:_(s32) = G_PHI %18(s32), %bb.1, %8(s32), %bb.2 + %22:_(s32) = G_CONSTANT i32 1 + %8:_(s32) = G_ADD %6, %22 + %9:_(s32) = G_UITOFP %8(s32) + %10:_(s1) = G_FCMP floatpred(ogt), %9(s32), %0 + %11:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %10(s1), %5(s32) + SI_LOOP %11(s32), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.3 + + bb.3: + %13:_(s32) = G_PHI %8(s32), %bb.2 + %14:_(s32) = G_PHI %11(s32), %bb.2 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %14(s32) + %21:_(s32) = G_CONSTANT i32 10 + %16:_(s32) = G_MUL %13, %21 + G_STORE %16(s32), %1(p1) :: (store (s32), addrspace 1) + S_ENDPGM 0 +... + +--- +name: loop_with_2breaks +legalized: true +tracksRegLiveness: true +body: | + ; OLD_RBS-LABEL: name: loop_with_2breaks + ; OLD_RBS: bb.0: + ; OLD_RBS-NEXT: successors: %bb.1(0x80000000) + ; OLD_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; OLD_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; OLD_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; OLD_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; OLD_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; OLD_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; OLD_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; OLD_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; OLD_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; OLD_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF + ; OLD_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.1: + ; OLD_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; OLD_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; OLD_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; OLD_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; OLD_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[PHI2]](s32) + ; OLD_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; OLD_RBS-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY7]], [[C1]](s32) + ; OLD_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY7]](s32), [[ASHR]](s32) + ; OLD_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; OLD_RBS-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY8]](s32) + ; OLD_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; OLD_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; OLD_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; OLD_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[COPY9]] + ; OLD_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C4]](s32) + ; OLD_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC]](s1) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; OLD_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.2 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.2: + ; OLD_RBS-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[C5]](s32) + ; OLD_RBS-NEXT: [[SHL1:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY12]](s32) + ; OLD_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; OLD_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; OLD_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; OLD_RBS-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[C6]](s32) + ; OLD_RBS-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[COPY13]] + ; OLD_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C7]](s32) + ; OLD_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC1]](s1) + ; OLD_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) + ; OLD_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.4 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.3: + ; OLD_RBS-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; OLD_RBS-NEXT: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; OLD_RBS-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; OLD_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI1]](s32) + ; OLD_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; OLD_RBS-NEXT: G_BR %bb.6 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.4: + ; OLD_RBS-NEXT: successors: %bb.5(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; OLD_RBS-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[C8]](s32) + ; OLD_RBS-NEXT: [[SHL2:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY17]](s32) + ; OLD_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; OLD_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; OLD_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; OLD_RBS-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; OLD_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[COPY18]] + ; OLD_RBS-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; OLD_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; OLD_RBS-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI2]], [[COPY19]] + ; OLD_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 + ; OLD_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[C10]](s32) + ; OLD_RBS-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY20]] + ; OLD_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.5: + ; OLD_RBS-NEXT: successors: %bb.3(0x80000000) + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY14]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; OLD_RBS-NEXT: [[PHI6:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; OLD_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; OLD_RBS-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[COPY22]](s1) + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; OLD_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc + ; OLD_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY23]](s1), implicit-def $scc + ; OLD_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; OLD_RBS-NEXT: G_BR %bb.3 + ; OLD_RBS-NEXT: {{ $}} + ; OLD_RBS-NEXT: bb.6: + ; OLD_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 + ; OLD_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; OLD_RBS-NEXT: S_ENDPGM 0 + ; + ; NEW_RBS-LABEL: name: loop_with_2breaks + ; NEW_RBS: bb.0: + ; NEW_RBS-NEXT: successors: %bb.1(0x80000000) + ; NEW_RBS-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; NEW_RBS-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; NEW_RBS-NEXT: [[MV:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; NEW_RBS-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; NEW_RBS-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; NEW_RBS-NEXT: [[MV1:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; NEW_RBS-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; NEW_RBS-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5 + ; NEW_RBS-NEXT: [[MV2:%[0-9]+]]:vgpr(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; NEW_RBS-NEXT: [[DEF:%[0-9]+]]:sgpr(s32) = G_IMPLICIT_DEF + ; NEW_RBS-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.1: + ; NEW_RBS-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = PHI [[DEF1]](s1), %bb.0, %13(s1), %bb.3 + ; NEW_RBS-NEXT: [[PHI1:%[0-9]+]]:sgpr(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; NEW_RBS-NEXT: [[PHI2:%[0-9]+]]:vgpr(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 + ; NEW_RBS-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]](s1) + ; NEW_RBS-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[PHI2]](s32) + ; NEW_RBS-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31 + ; NEW_RBS-NEXT: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY7]], [[C1]](s32) + ; NEW_RBS-NEXT: [[MV3:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY7]](s32), [[ASHR]](s32) + ; NEW_RBS-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[C2]](s32) + ; NEW_RBS-NEXT: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY8]](s32) + ; NEW_RBS-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) + ; NEW_RBS-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) + ; NEW_RBS-NEXT: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[C3]](s32) + ; NEW_RBS-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[COPY9]] + ; NEW_RBS-NEXT: [[C4:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C4]](s32) + ; NEW_RBS-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC]](s1) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) + ; NEW_RBS-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.2 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.2: + ; NEW_RBS-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C5:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[C5]](s32) + ; NEW_RBS-NEXT: [[SHL1:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY12]](s32) + ; NEW_RBS-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) + ; NEW_RBS-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32), addrspace 1) + ; NEW_RBS-NEXT: [[C6:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; NEW_RBS-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[C6]](s32) + ; NEW_RBS-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD1]](s32), [[COPY13]] + ; NEW_RBS-NEXT: [[C7:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s1) = G_TRUNC [[C7]](s32) + ; NEW_RBS-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[TRUNC1]](s1) + ; NEW_RBS-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) + ; NEW_RBS-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.4 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.3: + ; NEW_RBS-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = PHI [[S_OR_B32_]](s1), %bb.1, %43(s1), %bb.5 + ; NEW_RBS-NEXT: [[PHI4:%[0-9]+]]:vgpr(s32) = G_PHI %44(s32), %bb.5, [[DEF]](s32), %bb.1 + ; NEW_RBS-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) + ; NEW_RBS-NEXT: [[INT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI1]](s32) + ; NEW_RBS-NEXT: SI_LOOP [[INT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + ; NEW_RBS-NEXT: G_BR %bb.6 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.4: + ; NEW_RBS-NEXT: successors: %bb.5(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[C8:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 2 + ; NEW_RBS-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[C8]](s32) + ; NEW_RBS-NEXT: [[SHL2:%[0-9]+]]:vgpr(s64) = G_SHL [[MV3]], [[COPY17]](s32) + ; NEW_RBS-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[MV]], [[SHL2]](s64) + ; NEW_RBS-NEXT: [[LOAD2:%[0-9]+]]:vgpr(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load (s32), addrspace 1) + ; NEW_RBS-NEXT: [[C9:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; NEW_RBS-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; NEW_RBS-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[LOAD2]], [[COPY18]] + ; NEW_RBS-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) + ; NEW_RBS-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[C9]](s32) + ; NEW_RBS-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[PHI2]], [[COPY19]] + ; NEW_RBS-NEXT: [[C10:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 100 + ; NEW_RBS-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[C10]](s32) + ; NEW_RBS-NEXT: [[ICMP2:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[COPY20]] + ; NEW_RBS-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY15]](s1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.5: + ; NEW_RBS-NEXT: successors: %bb.3(0x80000000) + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: [[PHI5:%[0-9]+]]:sreg_32(s1) = PHI [[COPY14]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 + ; NEW_RBS-NEXT: [[PHI6:%[0-9]+]]:vgpr(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; NEW_RBS-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]](s1) + ; NEW_RBS-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[COPY22]](s1) + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) + ; NEW_RBS-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc + ; NEW_RBS-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY23]](s1), implicit-def $scc + ; NEW_RBS-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; NEW_RBS-NEXT: G_BR %bb.3 + ; NEW_RBS-NEXT: {{ $}} + ; NEW_RBS-NEXT: bb.6: + ; NEW_RBS-NEXT: [[PHI7:%[0-9]+]]:sgpr(s32) = G_PHI [[INT]](s32), %bb.3 + ; NEW_RBS-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; NEW_RBS-NEXT: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x80000000) + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(p1) = G_MERGE_VALUES %0(s32), %1(s32) + %3:_(s32) = COPY $vgpr2 + %4:_(s32) = COPY $vgpr3 + %5:_(p1) = G_MERGE_VALUES %3(s32), %4(s32) + %6:_(s32) = COPY $vgpr4 + %7:_(s32) = COPY $vgpr5 + %8:_(p1) = G_MERGE_VALUES %6(s32), %7(s32) + %9:_(s32) = G_IMPLICIT_DEF + %10:_(s32) = G_CONSTANT i32 0 + %11:sreg_32(s1) = IMPLICIT_DEF + + bb.1: + successors: %bb.2(0x40000000), %bb.3(0x40000000) + + %12:sreg_32(s1) = PHI %11(s1), %bb.0, %13(s1), %bb.3 + %14:_(s32) = G_PHI %15(s32), %bb.3, %10(s32), %bb.0 + %16:_(s32) = G_PHI %10(s32), %bb.0, %17(s32), %bb.3 + %18:sreg_32(s1) = COPY %12(s1) + %19:_(s64) = G_SEXT %16(s32) + %20:_(s32) = G_CONSTANT i32 2 + %21:_(s64) = G_SHL %19, %20(s32) + %22:_(p1) = G_PTR_ADD %5, %21(s64) + %23:_(s32) = G_LOAD %22(p1) :: (load (s32), addrspace 1) + %24:_(s32) = G_CONSTANT i32 0 + %25:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %23(s32), %24 + %26:_(s1) = G_CONSTANT i1 true + %27:sreg_32(s1) = COPY %26(s1) + %28:sreg_32(s1) = S_ANDN2_B32 %18(s1), $exec_lo, implicit-def $scc + %29:sreg_32(s1) = S_AND_B32 $exec_lo, %27(s1), implicit-def $scc + %30:sreg_32(s1) = S_OR_B32 %28(s1), %29(s1), implicit-def $scc + %31:sreg_32(s1) = COPY %30(s1) + %32:sreg_32_xm0_xexec(s32) = SI_IF %25(s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.2 + + bb.2: + successors: %bb.4(0x40000000), %bb.5(0x40000000) + + %33:_(s32) = G_CONSTANT i32 2 + %34:_(s64) = G_SHL %19, %33(s32) + %35:_(p1) = G_PTR_ADD %8, %34(s64) + %36:_(s32) = G_LOAD %35(p1) :: (load (s32), addrspace 1) + %37:_(s32) = G_CONSTANT i32 0 + %38:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), %36(s32), %37 + %39:_(s1) = G_CONSTANT i1 true + %40:sreg_32(s1) = COPY %39(s1) + %41:sreg_32(s1) = COPY %40(s1) + %42:sreg_32_xm0_xexec(s32) = SI_IF %38(s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.4 + + bb.3: + successors: %bb.6(0x04000000), %bb.1(0x7c000000) + + %13:sreg_32(s1) = PHI %30(s1), %bb.1, %43(s1), %bb.5 + %17:_(s32) = G_PHI %44(s32), %bb.5, %9(s32), %bb.1 + %45:sreg_32(s1) = COPY %13(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %32(s32) + %15:sreg_32_xm0_xexec(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), %45(s1), %14(s32) + SI_LOOP %15(s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec + G_BR %bb.6 + + bb.4: + successors: %bb.5(0x80000000) + + %46:_(s32) = G_CONSTANT i32 2 + %47:_(s64) = G_SHL %19, %46(s32) + %48:_(p1) = G_PTR_ADD %2, %47(s64) + %49:_(s32) = G_LOAD %48(p1) :: (load (s32), addrspace 1) + %50:_(s32) = G_CONSTANT i32 1 + %51:_(s32) = G_ADD %49, %50 + G_STORE %51(s32), %48(p1) :: (store (s32), addrspace 1) + %52:_(s32) = G_ADD %16, %50 + %53:_(s32) = G_CONSTANT i32 100 + %54:_(s1) = G_ICMP intpred(ult), %16(s32), %53 + %55:sreg_32(s1) = COPY %54(s1) + %56:sreg_32(s1) = S_ANDN2_B32 %41(s1), $exec_lo, implicit-def $scc + %57:sreg_32(s1) = S_AND_B32 $exec_lo, %55(s1), implicit-def $scc + %58:sreg_32(s1) = S_OR_B32 %56(s1), %57(s1), implicit-def $scc + + bb.5: + successors: %bb.3(0x80000000) + + %59:sreg_32(s1) = PHI %40(s1), %bb.2, %58(s1), %bb.4 + %44:_(s32) = G_PHI %52(s32), %bb.4, %9(s32), %bb.2 + %60:sreg_32(s1) = COPY %59(s1) + %61:sreg_32(s1) = COPY %60(s1) + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %42(s32) + %62:sreg_32(s1) = S_ANDN2_B32 %31(s1), $exec_lo, implicit-def $scc + %63:sreg_32(s1) = S_AND_B32 $exec_lo, %61(s1), implicit-def $scc + %43:sreg_32(s1) = S_OR_B32 %62(s1), %63(s1), implicit-def $scc + G_BR %bb.3 + + bb.6: + %64:_(s32) = G_PHI %15(s32), %bb.3 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %64(s32) + S_ENDPGM 0 +...