Skip to content

Commit

Permalink
[AMDGPU] Simplify selection of llvm.amdgcn.inverse.ballot. NFCI. (#99345
Browse files Browse the repository at this point in the history
)

Summary: 

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60251064
  • Loading branch information
jayfoad authored and yuxuanchen1997 committed Jul 25, 2024
1 parent e827ce3 commit 3629ff8
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 46 deletions.
12 changes: 0 additions & 12 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2775,18 +2775,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_interp_p1_f16:
SelectInterpP1F16(N);
return;
case Intrinsic::amdgcn_inverse_ballot:
switch (N->getOperand(1).getValueSizeInBits()) {
case 32:
Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
break;
case 64:
Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
break;
default:
llvm_unreachable("Unsupported size for inverse ballot mask.");
}
break;
default:
SelectCode(N);
break;
Expand Down
13 changes: 0 additions & 13 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1055,8 +1055,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return selectIntrinsicCmp(I);
case Intrinsic::amdgcn_ballot:
return selectBallot(I);
case Intrinsic::amdgcn_inverse_ballot:
return selectInverseBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
case Intrinsic::amdgcn_groupstaticsize:
Expand Down Expand Up @@ -1449,17 +1447,6 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
return true;
}

bool AMDGPUInstructionSelector::selectInverseBallot(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
const Register DstReg = I.getOperand(0).getReg();
const Register MaskReg = I.getOperand(2).getReg();

BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(MaskReg);
I.eraseFromParent();
return true;
}

bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDivScale(MachineInstr &MI) const;
bool selectIntrinsicCmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
bool selectInverseBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
bool selectGroupStaticSize(MachineInstr &I) const;
bool selectReturnAddress(MachineInstr &I) const;
Expand Down
21 changes: 4 additions & 17 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5480,24 +5480,11 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return BB;
}
case AMDGPU::S_INVERSE_BALLOT_U32:
case AMDGPU::S_INVERSE_BALLOT_U64: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const DebugLoc &DL = MI.getDebugLoc();
const Register DstReg = MI.getOperand(0).getReg();
Register MaskReg = MI.getOperand(1).getReg();

const bool IsVALU = TRI->isVectorRegister(MRI, MaskReg);

if (IsVALU) {
MaskReg = TII->readlaneVGPRToSGPR(MaskReg, MI, MRI);
}

BuildMI(*BB, &MI, DL, TII->get(AMDGPU::COPY), DstReg).addReg(MaskReg);
MI.eraseFromParent();
case AMDGPU::S_INVERSE_BALLOT_U64:
// These opcodes only exist to let SIFixSGPRCopies insert a readfirstlane if
// necessary. After that they are equivalent to a COPY.
MI.setDesc(TII->get(AMDGPU::COPY));
return BB;
}
case AMDGPU::ENDPGM_TRAP: {
const DebugLoc &DL = MI.getDebugLoc();
if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6686,7 +6686,9 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
MI.getOpcode() == AMDGPU::S_WQM_B32 ||
MI.getOpcode() == AMDGPU::S_WQM_B64) {
MI.getOpcode() == AMDGPU::S_WQM_B64 ||
MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
MachineOperand &Src = MI.getOperand(1);
if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,15 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
}

let usesCustomInserter = 1 in {
def S_INVERSE_BALLOT_U32 : SPseudoInstSI <(outs SReg_32:$sdst), (ins SSrc_b32:$mask)>;
def S_INVERSE_BALLOT_U32 : SPseudoInstSI<
(outs SReg_32:$sdst), (ins SSrc_b32:$mask),
[(set i1:$sdst, (int_amdgcn_inverse_ballot i32:$mask))]
>;

def S_INVERSE_BALLOT_U64 : SPseudoInstSI <(outs SReg_64:$sdst), (ins SSrc_b64:$mask)>;
def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
(outs SReg_64:$sdst), (ins SSrc_b64:$mask),
[(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))]
>;
} // End usesCustomInserter = 1

// Pseudo instructions used for @llvm.fptrunc.round upward
Expand Down

0 comments on commit 3629ff8

Please sign in to comment.