Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMDGPU] Simplify selection of llvm.amdgcn.inverse.ballot. NFCI. #99345

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 0 additions & 12 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2775,18 +2775,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_interp_p1_f16:
SelectInterpP1F16(N);
return;
case Intrinsic::amdgcn_inverse_ballot:
switch (N->getOperand(1).getValueSizeInBits()) {
case 32:
Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
break;
case 64:
Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
break;
default:
llvm_unreachable("Unsupported size for inverse ballot mask.");
}
break;
default:
SelectCode(N);
break;
Expand Down
13 changes: 0 additions & 13 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1055,8 +1055,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return selectIntrinsicCmp(I);
case Intrinsic::amdgcn_ballot:
return selectBallot(I);
case Intrinsic::amdgcn_inverse_ballot:
return selectInverseBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
case Intrinsic::amdgcn_groupstaticsize:
Expand Down Expand Up @@ -1449,17 +1447,6 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
return true;
}

bool AMDGPUInstructionSelector::selectInverseBallot(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();
const Register DstReg = I.getOperand(0).getReg();
const Register MaskReg = I.getOperand(2).getReg();

BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(MaskReg);
I.eraseFromParent();
return true;
}

bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDivScale(MachineInstr &MI) const;
bool selectIntrinsicCmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
bool selectInverseBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
bool selectGroupStaticSize(MachineInstr &I) const;
bool selectReturnAddress(MachineInstr &I) const;
Expand Down
21 changes: 4 additions & 17 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5479,24 +5479,11 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return BB;
}
case AMDGPU::S_INVERSE_BALLOT_U32:
case AMDGPU::S_INVERSE_BALLOT_U64: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const DebugLoc &DL = MI.getDebugLoc();
const Register DstReg = MI.getOperand(0).getReg();
Register MaskReg = MI.getOperand(1).getReg();

const bool IsVALU = TRI->isVectorRegister(MRI, MaskReg);

if (IsVALU) {
MaskReg = TII->readlaneVGPRToSGPR(MaskReg, MI, MRI);
}

BuildMI(*BB, &MI, DL, TII->get(AMDGPU::COPY), DstReg).addReg(MaskReg);
MI.eraseFromParent();
case AMDGPU::S_INVERSE_BALLOT_U64:
// These opcodes only exist to let SIFixSGPRCopies insert a readfirstlane if
// necessary. After that they are equivalent to a COPY.
MI.setDesc(TII->get(AMDGPU::COPY));
return BB;
}
case AMDGPU::ENDPGM_TRAP: {
const DebugLoc &DL = MI.getDebugLoc();
if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6687,7 +6687,9 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
MI.getOpcode() == AMDGPU::S_WQM_B32 ||
MI.getOpcode() == AMDGPU::S_WQM_B64) {
MI.getOpcode() == AMDGPU::S_WQM_B64 ||
MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
MachineOperand &Src = MI.getOperand(1);
if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
Expand Down
10 changes: 8 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,15 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
}

let usesCustomInserter = 1 in {
def S_INVERSE_BALLOT_U32 : SPseudoInstSI <(outs SReg_32:$sdst), (ins SSrc_b32:$mask)>;
def S_INVERSE_BALLOT_U32 : SPseudoInstSI<
(outs SReg_32:$sdst), (ins SSrc_b32:$mask),
[(set i1:$sdst, (int_amdgcn_inverse_ballot i32:$mask))]
>;

def S_INVERSE_BALLOT_U64 : SPseudoInstSI <(outs SReg_64:$sdst), (ins SSrc_b64:$mask)>;
def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
(outs SReg_64:$sdst), (ins SSrc_b64:$mask),
[(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))]
>;
} // End usesCustomInserter = 1

// Pseudo instructions used for @llvm.fptrunc.round upward
Expand Down
Loading