Skip to content

Commit

Permalink
[AMDGPU] Remove SIWholeQuadMode pass early exit (#98450)
Browse files Browse the repository at this point in the history
Merge the code bypass elements from the early exit into the main pass
execution flow.
  • Loading branch information
perlfu authored Jul 17, 2024
1 parent 0b71d80 commit 8d28a41
Showing 1 changed file with 38 additions and 25 deletions.
63 changes: 38 additions & 25 deletions llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,11 +219,12 @@ class SIWholeQuadMode : public MachineFunctionPass {
void lowerBlock(MachineBasicBlock &MBB);
void processBlock(MachineBasicBlock &MBB, bool IsEntry);

void lowerLiveMaskQueries();
void lowerCopyInstrs();
void lowerKillInstrs(bool IsWQM);
bool lowerLiveMaskQueries();
bool lowerCopyInstrs();
bool lowerKillInstrs(bool IsWQM);
void lowerInitExec(MachineInstr &MI);
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry);
MachineBasicBlock::iterator lowerInitExecInstrs(MachineBasicBlock &Entry,
bool &Changed);

public:
static char ID;
Expand Down Expand Up @@ -796,6 +797,8 @@ MachineBasicBlock *SIWholeQuadMode::splitBlock(MachineBasicBlock *BB,

MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
MachineInstr &MI) {
assert(LiveMaskReg.isVirtual());

const DebugLoc &DL = MI.getDebugLoc();
unsigned Opcode = 0;

Expand Down Expand Up @@ -913,6 +916,8 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,

MachineInstr *SIWholeQuadMode::lowerKillI1(MachineBasicBlock &MBB,
MachineInstr &MI, bool IsWQM) {
assert(LiveMaskReg.isVirtual());

const DebugLoc &DL = MI.getDebugLoc();
MachineInstr *MaskUpdateMI = nullptr;

Expand Down Expand Up @@ -1144,6 +1149,8 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before,
Register SaveWQM) {
assert(LiveMaskReg.isVirtual());

bool IsTerminator = Before == MBB.end();
if (!IsTerminator) {
auto FirstTerm = MBB.getFirstTerminator();
Expand Down Expand Up @@ -1423,7 +1430,7 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
assert(!SavedNonStrictReg);
}

void SIWholeQuadMode::lowerLiveMaskQueries() {
bool SIWholeQuadMode::lowerLiveMaskQueries() {
for (MachineInstr *MI : LiveMaskQueries) {
const DebugLoc &DL = MI->getDebugLoc();
Register Dest = MI->getOperand(0).getReg();
Expand All @@ -1435,9 +1442,10 @@ void SIWholeQuadMode::lowerLiveMaskQueries() {
LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
MI->eraseFromParent();
}
return !LiveMaskQueries.empty();
}

void SIWholeQuadMode::lowerCopyInstrs() {
bool SIWholeQuadMode::lowerCopyInstrs() {
for (MachineInstr *MI : LowerToMovInstrs) {
assert(MI->getNumExplicitOperands() == 2);

Expand Down Expand Up @@ -1492,9 +1500,10 @@ void SIWholeQuadMode::lowerCopyInstrs() {
*MRI, MI->getOperand(0)));
MI->setDesc(TII->get(CopyOp));
}
return !LowerToCopyInstrs.empty() || !LowerToMovInstrs.empty();
}

void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
bool SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
for (MachineInstr *MI : KillInstrs) {
MachineBasicBlock *MBB = MI->getParent();
MachineInstr *SplitPoint = nullptr;
Expand All @@ -1510,6 +1519,7 @@ void SIWholeQuadMode::lowerKillInstrs(bool IsWQM) {
if (SplitPoint)
splitBlock(MBB, SplitPoint);
}
return !KillInstrs.empty();
}

void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
Expand Down Expand Up @@ -1601,7 +1611,7 @@ void SIWholeQuadMode::lowerInitExec(MachineInstr &MI) {
/// Lower INIT_EXEC instructions. Return a suitable insert point in \p Entry
/// for instructions that depend on EXEC.
MachineBasicBlock::iterator
SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry, bool &Changed) {
MachineBasicBlock::iterator InsertPt = Entry.getFirstNonPHI();

for (MachineInstr *MI : InitExecInstrs) {
Expand All @@ -1612,6 +1622,7 @@ SIWholeQuadMode::lowerInitExecInstrs(MachineBasicBlock &Entry) {
InsertPt = std::next(MI->getIterator());

lowerInitExec(*MI);
Changed = true;
}

return InsertPt;
Expand Down Expand Up @@ -1664,48 +1675,50 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
}

const char GlobalFlags = analyzeFunction(MF);
const bool NeedsLiveMask = !(KillInstrs.empty() && LiveMaskQueries.empty());
bool Changed = false;

LiveMaskReg = Exec;

MachineBasicBlock &Entry = MF.front();
MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry);

// Shader is simple does not need any state changes or any complex lowering
if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
LowerToMovInstrs.empty() && KillInstrs.empty()) {
lowerLiveMaskQueries();
if (!InitExecInstrs.empty())
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);
return !InitExecInstrs.empty() || !LiveMaskQueries.empty();
}
MachineBasicBlock::iterator EntryMI = lowerInitExecInstrs(Entry, Changed);

// Store a copy of the original live mask when required
if (NeedsLiveMask || (GlobalFlags & StateWQM)) {
const bool HasLiveMaskQueries = !LiveMaskQueries.empty();
const bool HasWaveModes = GlobalFlags & ~StateExact;
const bool HasKills = !KillInstrs.empty();
const bool UsesWQM = GlobalFlags & StateWQM;
if (HasKills || UsesWQM || (HasWaveModes && HasLiveMaskQueries)) {
LiveMaskReg = MRI->createVirtualRegister(TRI->getBoolRC());
MachineInstr *MI =
BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
.addReg(Exec);
LIS->InsertMachineInstrInMaps(*MI);
Changed = true;
}

LLVM_DEBUG(printInfo());

lowerLiveMaskQueries();
lowerCopyInstrs();
Changed |= lowerLiveMaskQueries();
Changed |= lowerCopyInstrs();

// Shader only needs WQM
if (GlobalFlags == StateWQM) {
if (!HasWaveModes) {
// No wave mode execution
Changed |= lowerKillInstrs(false);
} else if (GlobalFlags == StateWQM) {
// Shader only needs WQM
auto MI = BuildMI(Entry, EntryMI, DebugLoc(), TII->get(WQMOpc), Exec)
.addReg(Exec);
LIS->InsertMachineInstrInMaps(*MI);
lowerKillInstrs(true);
Changed = true;
} else {
// Wave mode switching requires full lowering pass.
for (auto BII : Blocks)
processBlock(*BII.first, BII.first == &Entry);
// Lowering blocks causes block splitting so perform as a second pass.
for (auto BII : Blocks)
lowerBlock(*BII.first);
Changed = true;
}

// Compute live range for live mask
Expand All @@ -1721,5 +1734,5 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (!KillInstrs.empty() || !InitExecInstrs.empty())
LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);

return true;
return Changed;
}

0 comments on commit 8d28a41

Please sign in to comment.