-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Simplify selection of llvm.amdgcn.inverse.ballot. NFCI. #99345
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-amdgpu Author: Jay Foad (jayfoad) ChangesFull diff: https://github.com/llvm/llvm-project/pull/99345.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 6d5ffc66d98b2..b7471bab12850 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2775,18 +2775,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_interp_p1_f16:
SelectInterpP1F16(N);
return;
- case Intrinsic::amdgcn_inverse_ballot:
- switch (N->getOperand(1).getValueSizeInBits()) {
- case 32:
- Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
- break;
- case 64:
- Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
- break;
- default:
- llvm_unreachable("Unsupported size for inverse ballot mask.");
- }
- break;
default:
SelectCode(N);
break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index dcb0f47973c4a..da3e8c0a62b08 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1055,8 +1055,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return selectIntrinsicCmp(I);
case Intrinsic::amdgcn_ballot:
return selectBallot(I);
- case Intrinsic::amdgcn_inverse_ballot:
- return selectInverseBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
case Intrinsic::amdgcn_groupstaticsize:
@@ -1449,17 +1447,6 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
return true;
}
-bool AMDGPUInstructionSelector::selectInverseBallot(MachineInstr &I) const {
- MachineBasicBlock *BB = I.getParent();
- const DebugLoc &DL = I.getDebugLoc();
- const Register DstReg = I.getOperand(0).getReg();
- const Register MaskReg = I.getOperand(2).getReg();
-
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(MaskReg);
- I.eraseFromParent();
- return true;
-}
-
bool AMDGPUInstructionSelector::selectRelocConstant(MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 2d3317e04ce12..43ed210508d33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -112,7 +112,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDivScale(MachineInstr &MI) const;
bool selectIntrinsicCmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
- bool selectInverseBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
bool selectGroupStaticSize(MachineInstr &I) const;
bool selectReturnAddress(MachineInstr &I) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index df5a334f83082..4d81048ce54db 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5479,24 +5479,11 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return BB;
}
case AMDGPU::S_INVERSE_BALLOT_U32:
- case AMDGPU::S_INVERSE_BALLOT_U64: {
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const DebugLoc &DL = MI.getDebugLoc();
- const Register DstReg = MI.getOperand(0).getReg();
- Register MaskReg = MI.getOperand(1).getReg();
-
- const bool IsVALU = TRI->isVectorRegister(MRI, MaskReg);
-
- if (IsVALU) {
- MaskReg = TII->readlaneVGPRToSGPR(MaskReg, MI, MRI);
- }
-
- BuildMI(*BB, &MI, DL, TII->get(AMDGPU::COPY), DstReg).addReg(MaskReg);
- MI.eraseFromParent();
+ case AMDGPU::S_INVERSE_BALLOT_U64:
+ // These opcodes only exist to let SIFixSGPRCopies insert a readfirstlane if
+ // necessary. After that they are equivalent to a COPY.
+ MI.setDesc(TII->get(AMDGPU::COPY));
return BB;
- }
case AMDGPU::ENDPGM_TRAP: {
const DebugLoc &DL = MI.getDebugLoc();
if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6d12e8c6f2de2..57c6597f124f4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6687,7 +6687,9 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
MI.getOpcode() == AMDGPU::S_QUADMASK_B32 ||
MI.getOpcode() == AMDGPU::S_QUADMASK_B64 ||
MI.getOpcode() == AMDGPU::S_WQM_B32 ||
- MI.getOpcode() == AMDGPU::S_WQM_B64) {
+ MI.getOpcode() == AMDGPU::S_WQM_B64 ||
+ MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U32 ||
+ MI.getOpcode() == AMDGPU::S_INVERSE_BALLOT_U64) {
MachineOperand &Src = MI.getOperand(1);
if (Src.isReg() && RI.hasVectorRegisters(MRI.getRegClass(Src.getReg())))
Src.setReg(readlaneVGPRToSGPR(Src.getReg(), MI, MRI));
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 77b17a0f2789b..f2721fbd164bf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -212,9 +212,15 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
}
let usesCustomInserter = 1 in {
-def S_INVERSE_BALLOT_U32 : SPseudoInstSI <(outs SReg_32:$sdst), (ins SSrc_b32:$mask)>;
+def S_INVERSE_BALLOT_U32 : SPseudoInstSI<
+ (outs SReg_32:$sdst), (ins SSrc_b32:$mask),
+ [(set i1:$sdst, (int_amdgcn_inverse_ballot i32:$mask))]
+>;
-def S_INVERSE_BALLOT_U64 : SPseudoInstSI <(outs SReg_64:$sdst), (ins SSrc_b64:$mask)>;
+def S_INVERSE_BALLOT_U64 : SPseudoInstSI<
+ (outs SReg_64:$sdst), (ins SSrc_b64:$mask),
+ [(set i1:$sdst, (int_amdgcn_inverse_ballot i64:$mask))]
+>;
} // End usesCustomInserter = 1
// Pseudo instructions used for @llvm.fptrunc.round upward
|
arsenm
approved these changes
Jul 18, 2024
Harini0924
pushed a commit
to Harini0924/llvm-project
that referenced
this pull request
Jul 22, 2024
sgundapa
pushed a commit
to sgundapa/upstream_effort
that referenced
this pull request
Jul 23, 2024
yuxuanchen1997
pushed a commit
that referenced
this pull request
Jul 25, 2024
) Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251064
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.