Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Add skeletons for new register bank select passes
Browse files Browse the repository at this point in the history
New register bank select for AMDGPU will be split in two passes:
- AMDGPURegBankSelect: select banks based on machine uniformity analysis
- AMDGPURegBankLegalize: lower instructions that can't be inst-selected
  with register banks assigned by AMDGPURegBankSelect.
AMDGPURegBankLegalize is similar to legalizer but with context of
uniformity analysis. Does not change already assigned banks.
Main goal of AMDGPURegBankLegalize is to provide high level table-like
overview of how to lower generic instructions based on available target
features and uniformity info (uniform vs divergent).
See RegBankLegalizeRules.

Summary of new features:
At the moment register bank select assigns register bank to output
register using simple algorithm:
- one of the inputs is vgpr output is vgpr
- all inputs are sgpr output is sgpr.
When function does not contain divergent control flow propagating
register banks like this works. In general, first point is still correct
but second is not when function contains divergent control flow.
Examples:
- Phi with uniform inputs that go through divergent branch
- Instruction with temporal divergent use.
To fix this AMDGPURegBankSelect will use machine uniformity analysis
to assign vgpr to each divergent and sgpr to each uniform instruction.
But some instructions are only available on VALU (for example floating
point instructions before gfx1150) and we need to assign vgpr to them.
Since we are no longer propagating register banks we need to ensure that
uniform instructions get their inputs in sgpr in some way.
In AMDGPURegBankLegalize uniform instructions that are only available on
VALU will be reassigned to vgpr on all operands and read-any-lane vgpr
output to original sgpr output.
  • Loading branch information
petar-avramovic committed Oct 28, 2024
1 parent f6c230d commit 07055a7
Show file tree
Hide file tree
Showing 11 changed files with 4,048 additions and 2 deletions.
10 changes: 9 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,16 @@ void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
void initializeAMDGPURegBankCombinerPass(PassRegistry &);
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
FunctionPass *createAMDGPURegBankSelectPass();
FunctionPass *createAMDGPURegBankLegalizePass();

// SI Passes
FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowLegacyPass();
FunctionPass *createSIFoldOperandsLegacyPass();
FunctionPass *createSIPeepholeSDWALegacyPass();
FunctionPass *createSILowerI1CopiesLegacyPass();
FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
FunctionPass *createSIShrinkInstructionsLegacyPass();
FunctionPass *createSILoadStoreOptimizerLegacyPass();
FunctionPass *createSIWholeQuadModePass();
Expand Down Expand Up @@ -186,6 +188,12 @@ extern char &SILowerI1CopiesLegacyID;
void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
extern char &AMDGPUGlobalISelDivergenceLoweringID;

void initializeAMDGPURegBankSelectPass(PassRegistry &);
extern char &AMDGPURegBankSelectID;

void initializeAMDGPURegBankLegalizePass(PassRegistry &);
extern char &AMDGPURegBankLegalizeID;

void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
extern char &AMDGPUMarkLastScratchLoadID;

Expand Down
79 changes: 79 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
//===-- AMDGPURegBankLegalize.cpp -----------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// Lower G_ instructions that can't be inst-selected with register bank
/// assignment from AMDGPURegBankSelect based on machine uniformity info.
/// Given types on all operands, some register bank assignments require lowering
/// while others do not.
/// Note: cases where all register bank assignments would require lowering are
/// lowered in legalizer.
/// For example vgpr S64 G_AND requires lowering to S32 while sgpr S64 does not.
/// Eliminate sgpr S1 by lowering to sgpr S32.
//
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"

#define DEBUG_TYPE "amdgpu-regbanklegalize"

using namespace llvm;

namespace {

class AMDGPURegBankLegalize : public MachineFunctionPass {
public:
static char ID;

public:
AMDGPURegBankLegalize() : MachineFunctionPass(ID) {
initializeAMDGPURegBankLegalizePass(*PassRegistry::getPassRegistry());
}

bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
return "AMDGPU Register Bank Legalize";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}

// If there were no phis and we do waterfall expansion machine verifier would
// fail.
MachineFunctionProperties getClearedProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoPHIs);
}
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(AMDGPURegBankLegalize, DEBUG_TYPE,
"AMDGPU Register Bank Legalize", false, false)
INITIALIZE_PASS_END(AMDGPURegBankLegalize, DEBUG_TYPE,
"AMDGPU Register Bank Legalize", false, false)

char AMDGPURegBankLegalize::ID = 0;

char &llvm::AMDGPURegBankLegalizeID = AMDGPURegBankLegalize::ID;

FunctionPass *llvm::createAMDGPURegBankLegalizePass() {
return new AMDGPURegBankLegalize();
}

using namespace AMDGPU;

bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
return true;
}
74 changes: 74 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
//===-- AMDGPURegBankSelect.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// Assign register banks to all register operands of G_ instructions using
/// machine uniformity analysis.
/// SGPR - uniform values and some lane masks
/// VGPR - divergent, non S1, values
/// VCC - divergent S1 values(lane masks)
/// However in some cases G_ instructions with this register bank assignment
/// can't be inst-selected. This is solved in AMDGPURegBankLegalize.
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/InitializePasses.h"

#define DEBUG_TYPE "amdgpu-regbankselect"

using namespace llvm;

namespace {

class AMDGPURegBankSelect : public MachineFunctionPass {
public:
static char ID;

AMDGPURegBankSelect() : MachineFunctionPass(ID) {
initializeAMDGPURegBankSelectPass(*PassRegistry::getPassRegistry());
}

bool runOnMachineFunction(MachineFunction &MF) override;

StringRef getPassName() const override {
return "AMDGPU Register Bank Select";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}

// This pass assigns register banks to all virtual registers, and we maintain
// this property in subsequent passes
MachineFunctionProperties getSetProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::RegBankSelected);
}
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE,
"AMDGPU Register Bank Select", false, false)
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
"AMDGPU Register Bank Select", false, false)

char AMDGPURegBankSelect::ID = 0;

char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID;

FunctionPass *llvm::createAMDGPURegBankSelectPass() {
return new AMDGPURegBankSelect();
}

bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
return true;
}
15 changes: 14 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,12 @@ static cl::opt<bool>
cl::desc("Enable AMDGPUAttributorPass"),
cl::init(true), cl::Hidden);

static cl::opt<bool> NewRegBankSelect(
"new-reg-bank-select",
cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of "
"regbankselect"),
cl::init(false), cl::Hidden);

extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
Expand All @@ -459,6 +465,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeGCNDPPCombineLegacyPass(*PR);
initializeSILowerI1CopiesLegacyPass(*PR);
initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR);
initializeAMDGPURegBankSelectPass(*PR);
initializeAMDGPURegBankLegalizePass(*PR);
initializeSILowerWWMCopiesPass(*PR);
initializeAMDGPUMarkLastScratchLoadPass(*PR);
initializeSILowerSGPRSpillsLegacyPass(*PR);
Expand Down Expand Up @@ -1370,7 +1378,12 @@ void GCNPassConfig::addPreRegBankSelect() {
}

bool GCNPassConfig::addRegBankSelect() {
addPass(new RegBankSelect());
if (NewRegBankSelect) {
addPass(createAMDGPURegBankSelectPass());
addPass(createAMDGPURegBankLegalizePass());
} else {
addPass(new RegBankSelect());
}
return false;
}

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUPromoteAlloca.cpp
AMDGPUPromoteKernelArguments.cpp
AMDGPURegBankCombiner.cpp
AMDGPURegBankLegalize.cpp
AMDGPURegBankSelect.cpp
AMDGPURegisterBankInfo.cpp
AMDGPURemoveIncompatibleFunctions.cpp
AMDGPUReserveWWMRegs.cpp
Expand Down
Loading

0 comments on commit 07055a7

Please sign in to comment.