Skip to content

Commit

Permalink
[GISel] Combine (Scalarize) vector load followed by an element extract.
Browse files Browse the repository at this point in the history
  • Loading branch information
prtaneja authored Dec 6, 2023
1 parent 06c5c27 commit 41507fe
Show file tree
Hide file tree
Showing 10 changed files with 2,425 additions and 2,740 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,10 @@ class CombinerHelper {
/// Match (and (load x), mask) -> zextload x
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo);

/// Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed
/// load.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo);

bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);

Expand Down
10 changes: 8 additions & 2 deletions llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,12 @@ def sext_inreg_to_zext_inreg : GICombineRule<
}])
>;

def combine_extracted_vector_load : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root,
[{ return Helper.matchCombineExtractedVectorLoad(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def combine_indexed_load_store : GICombineRule<
(defs root:$root, indexed_load_store_matchdata:$matchinfo),
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
Expand Down Expand Up @@ -1291,8 +1297,8 @@ def constant_fold_binops : GICombineGroup<[constant_fold_binop,
constant_fold_fp_binop]>;

def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
undef_combines, identity_combines, phi_combines,
extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load,
undef_combines, identity_combines, phi_combines,
simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
Expand Down
95 changes: 95 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,101 @@ bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
return RealUse;
}

bool CombinerHelper::matchCombineExtractedVectorLoad(MachineInstr &MI,
BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);

// Check if there is a load that defines the vector being extracted from.
auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
if (!LoadMI)
return false;

Register Vector = MI.getOperand(1).getReg();
LLT VecEltTy = MRI.getType(Vector).getElementType();
LLT ResultTy = MRI.getType(MI.getOperand(0).getReg());

assert(ResultTy == VecEltTy);

// Checking whether we should reduce the load width.
if (!MRI.hasOneNonDBGUse(Vector))
return false;

// Check if the defining load is simple.
if (!LoadMI->isSimple())
return false;

// If the vector element type is not a multiple of a byte then we are unable
// to correctly compute an address to load only the extracted element as a
// scalar.
if (!VecEltTy.isByteSized())
return false;

// Check if the new load that we are going to create is legal
// if we are in the post-legalization phase.
MachineMemOperand MMO = LoadMI->getMMO();
Align Alignment = MMO.getAlign();
MachinePointerInfo PtrInfo;
uint64_t Offset;

// Finding the appropriate PtrInfo if offset is a known constant.
// This is required to create the memory operand for the narrowed load.
// This machine memory operand object helps us infer about legality
// before we proceed to combine the instruction.
if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
int Elt = CVal->getZExtValue();
// FIXME: should be (ABI size)*Elt.
Offset = VecEltTy.getSizeInBits() * Elt / 8;
PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
} else {
// Discard the pointer info except the address space because the memory
// operand can't represent this new access since the offset is variable.
Offset = VecEltTy.getSizeInBits() / 8;
PtrInfo = MachinePointerInfo(MMO.getPointerInfo().getAddrSpace());
}

Alignment = commonAlignment(Alignment, Offset);

Register VecPtr = LoadMI->getPointerReg();
LLT PtrTy = MRI.getType(VecPtr);

MachineFunction &MF = *MI.getMF();
auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);

LegalityQuery::MemDesc MMDesc(*NewMMO);

LegalityQuery Q = {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}};

if (!isLegalOrBeforeLegalizer(Q))
return false;

// Load must be allowed and fast on the target.
LLVMContext &C = MF.getFunction().getContext();
auto &DL = MF.getDataLayout();
unsigned Fast = 0;
if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
&Fast) ||
!Fast)
return false;

Register Result = MI.getOperand(0).getReg();
Register Index = MI.getOperand(2).getReg();

MatchInfo = [=](MachineIRBuilder &B) {
GISelObserverWrapper DummyObserver;
LegalizerHelper Helper(B.getMF(), DummyObserver, B);
//// Get pointer to the vector element.
Register finalPtr = Helper.getVectorElementPointer(
LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
Index);
// New G_LOAD instruction.
B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
// Remove original GLOAD instruction.
LoadMI->eraseFromParent();
};

return true;
}

bool CombinerHelper::matchCombineIndexedLoadStore(
MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
auto &LdSt = cast<GLoadStore>(MI);
Expand Down
58 changes: 13 additions & 45 deletions llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14659,17 +14659,9 @@ define i8 @load_single_extract_variable_index_i8(ptr %A, i32 %idx) {
;
; CHECK-GISEL-LABEL: load_single_extract_variable_index_i8:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: sub sp, sp, #16
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT: mov w9, w1
; CHECK-GISEL-NEXT: ldr q0, [x0]
; CHECK-GISEL-NEXT: mov x8, sp
; CHECK-GISEL-NEXT: and x9, x9, #0xf
; CHECK-GISEL-NEXT: lsl x10, x9, #1
; CHECK-GISEL-NEXT: str q0, [sp]
; CHECK-GISEL-NEXT: sub x9, x10, x9
; CHECK-GISEL-NEXT: ldrb w0, [x8, x9]
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: mov w8, w1
; CHECK-GISEL-NEXT: and x8, x8, #0xf
; CHECK-GISEL-NEXT: ldrb w0, [x0, x8]
; CHECK-GISEL-NEXT: ret
%lv = load <16 x i8>, ptr %A
%e = extractelement <16 x i8> %lv, i32 %idx
Expand All @@ -14692,15 +14684,9 @@ define i16 @load_single_extract_variable_index_i16(ptr %A, i32 %idx) {
;
; CHECK-GISEL-LABEL: load_single_extract_variable_index_i16:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: sub sp, sp, #16
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT: ldr q0, [x0]
; CHECK-GISEL-NEXT: mov w9, w1
; CHECK-GISEL-NEXT: mov x8, sp
; CHECK-GISEL-NEXT: and x9, x9, #0x7
; CHECK-GISEL-NEXT: str q0, [sp]
; CHECK-GISEL-NEXT: ldrh w0, [x8, x9, lsl #1]
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: mov w8, w1
; CHECK-GISEL-NEXT: and x8, x8, #0x7
; CHECK-GISEL-NEXT: ldrh w0, [x0, x8, lsl #1]
; CHECK-GISEL-NEXT: ret
%lv = load <8 x i16>, ptr %A
%e = extractelement <8 x i16> %lv, i32 %idx
Expand All @@ -14717,15 +14703,9 @@ define i32 @load_single_extract_variable_index_i32(ptr %A, i32 %idx) {
;
; CHECK-GISEL-LABEL: load_single_extract_variable_index_i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: sub sp, sp, #16
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT: ldr q0, [x0]
; CHECK-GISEL-NEXT: mov w9, w1
; CHECK-GISEL-NEXT: mov x8, sp
; CHECK-GISEL-NEXT: and x9, x9, #0x3
; CHECK-GISEL-NEXT: str q0, [sp]
; CHECK-GISEL-NEXT: ldr w0, [x8, x9, lsl #2]
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: mov w8, w1
; CHECK-GISEL-NEXT: and x8, x8, #0x3
; CHECK-GISEL-NEXT: ldr w0, [x0, x8, lsl #2]
; CHECK-GISEL-NEXT: ret
%lv = load <4 x i32>, ptr %A
%e = extractelement <4 x i32> %lv, i32 %idx
Expand Down Expand Up @@ -14779,14 +14759,8 @@ define i32 @load_single_extract_variable_index_masked_i32(ptr %A, i32 %idx) {
;
; CHECK-GISEL-LABEL: load_single_extract_variable_index_masked_i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: sub sp, sp, #16
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT: ldr q0, [x0]
; CHECK-GISEL-NEXT: mov x8, sp
; CHECK-GISEL-NEXT: and w9, w1, #0x3
; CHECK-GISEL-NEXT: str q0, [sp]
; CHECK-GISEL-NEXT: ldr w0, [x8, w9, uxtw #2]
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: and w8, w1, #0x3
; CHECK-GISEL-NEXT: ldr w0, [x0, w8, uxtw #2]
; CHECK-GISEL-NEXT: ret
%idx.x = and i32 %idx, 3
%lv = load <4 x i32>, ptr %A
Expand All @@ -14803,14 +14777,8 @@ define i32 @load_single_extract_variable_index_masked2_i32(ptr %A, i32 %idx) {
;
; CHECK-GISEL-LABEL: load_single_extract_variable_index_masked2_i32:
; CHECK-GISEL: ; %bb.0:
; CHECK-GISEL-NEXT: sub sp, sp, #16
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT: ldr q0, [x0]
; CHECK-GISEL-NEXT: mov x8, sp
; CHECK-GISEL-NEXT: and w9, w1, #0x1
; CHECK-GISEL-NEXT: str q0, [sp]
; CHECK-GISEL-NEXT: ldr w0, [x8, w9, uxtw #2]
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: and w8, w1, #0x1
; CHECK-GISEL-NEXT: ldr w0, [x0, w8, uxtw #2]
; CHECK-GISEL-NEXT: ret
%idx.x = and i32 %idx, 1
%lv = load <4 x i32>, ptr %A
Expand Down
Loading

0 comments on commit 41507fe

Please sign in to comment.