Skip to content

Commit

Permalink
[LV,LAA] Don't vectorize loops with load and store to invar address.
Browse files Browse the repository at this point in the history
Code checking stores to invariant addresses and reductions made an
incorrect assumption that the case of both a load & store to the same
invariant address does not need to be handled.

In some cases when vectorizing with runtime checks, there may be
dependences with a load and store to the same address, storing a
reduction value.

Update LAA to separately track if there was a store-store and a
load-store dependence with an invariant addresses.

Bail out early if there as a load-store dependence with invariant
address. If there was a store-store one, still apply the logic checking
if they all store a reduction.

(cherry picked from commit b54a78d)
  • Loading branch information
fhahn authored and AreaZR committed May 9, 2024
1 parent d9a7e51 commit 0c830c3
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 16 deletions.
28 changes: 21 additions & 7 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,7 +579,11 @@ class LoopAccessInfo {
AAResults *AA, DominatorTree *DT, LoopInfo *LI);

/// Return true we can analyze the memory accesses in the loop and there are
/// no memory dependence cycles.
/// no memory dependence cycles. Note that for dependences between loads &
/// stores with uniform addresses,
/// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
/// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
/// checked.
bool canVectorizeMemory() const { return CanVecMem; }

/// Return true if there is a convergent operation in the loop. There may
Expand Down Expand Up @@ -632,10 +636,16 @@ class LoopAccessInfo {
/// Print the information about the memory accesses in the loop.
void print(raw_ostream &OS, unsigned Depth = 0) const;

/// If the loop has memory dependence involving an invariant address, i.e. two
/// stores or a store and a load, then return true, else return false.
bool hasDependenceInvolvingLoopInvariantAddress() const {
return HasDependenceInvolvingLoopInvariantAddress;
/// Return true if the loop has memory dependence involving two stores to an
/// invariant address, else return false.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const {
return HasStoreStoreDependenceInvolvingLoopInvariantAddress;
}

/// Return true if the loop has memory dependence involving a load and a store
/// to an invariant address, else return false.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const {
return HasLoadStoreDependenceInvolvingLoopInvariantAddress;
}

/// Return the list of stores to invariant addresses.
Expand Down Expand Up @@ -697,8 +707,12 @@ class LoopAccessInfo {
bool CanVecMem = false;
bool HasConvergentOp = false;

/// Indicator that there are non vectorizable stores to a uniform address.
bool HasDependenceInvolvingLoopInvariantAddress = false;
/// Indicator that there are two non vectorizable stores to the same uniform
/// address.
bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false;
/// Indicator that there is non vectorizable load and store to the same
/// uniform address.
bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false;

/// List of stores to invariant addresses.
SmallVector<StoreInst *> StoresToInvariantAddresses;
Expand Down
14 changes: 9 additions & 5 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2465,7 +2465,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (isInvariant(Ptr)) {
// Record store instructions to loop invariant addresses
StoresToInvariantAddresses.push_back(ST);
HasDependenceInvolvingLoopInvariantAddress |=
HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
!UniformStores.insert(Ptr).second;
}

Expand Down Expand Up @@ -2521,7 +2521,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
if (UniformStores.count(Ptr)) {
LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
"load and uniform store to the same address!\n");
HasDependenceInvolvingLoopInvariantAddress = true;
HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
}

MemoryLocation Loc = MemoryLocation::get(LD);
Expand Down Expand Up @@ -2985,9 +2985,13 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
PtrRtChecking->print(OS, Depth);
OS << "\n";

OS.indent(Depth) << "Non vectorizable stores to invariant address were "
<< (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
<< "found in loop.\n";
OS.indent(Depth)
<< "Non vectorizable stores to invariant address were "
<< (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
HasLoadStoreDependenceInvolvingLoopInvariantAddress
? ""
: "not ")
<< "found in loop.\n";

OS.indent(Depth) << "SCEV assumptions:\n";
PSE->getPredicate().print(OS, Depth);
Expand Down
16 changes: 12 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,15 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
if (!LAI->canVectorizeMemory())
return false;

if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("We don't allow storing to uniform addresses",
"write to a loop invariant address could not "
"be vectorized",
"CantVectorizeStoreToLoopInvariantAddress", ORE,
TheLoop);
return false;
}

// We can vectorize stores to invariant address when final reduction value is
// guaranteed to be stored at the end of the loop. Also, if decision to
// vectorize loop is made, runtime checks are added so as to make sure that
Expand Down Expand Up @@ -1102,13 +1111,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
}
}

if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
// For each invariant address, check its last stored value is the result
// of one of our reductions.
//
// We do not check if dependence with loads exists because they are
// currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
// behaviour changes we have to modify this code.
// We do not check if dependence with loads exists because that is already
// checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
ScalarEvolution *SE = PSE.getSE();
SmallVector<StoreInst *, 4> UnhandledStores;
for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,40 @@ exit:
ret void
}

; Check that if we have a read from an invariant address, we do not vectorize,
; even if we vectorize with runtime checks. The test below is a variant of
; @reduc_store_load with a non-constant dependence distance, resulting in
; vectorization with runtime checks.
;
; CHECK-LABEL: @reduc_store_load_with_non_constant_distance_dependence
; CHECK-NOT: vector.body:
define void @reduc_store_load_with_non_constant_distance_dependence(ptr %dst, ptr noalias %dst.2, i64 %off) {
entry:
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 42
%dst.2.off = getelementptr inbounds i32, ptr %dst.2, i64 %off
store i32 0, ptr %gep.dst, align 4
br label %for.body

for.body:
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
%gep.src = getelementptr inbounds i32, ptr %dst.2, i64 %iv
%0 = load i32, ptr %gep.src, align 4
%iv.off = mul i64 %iv, 2
%add = add nsw i32 %sum, %0
%lv = load i32, ptr %gep.dst
store i32 %add, ptr %gep.dst, align 4
%gep.src.2 = getelementptr inbounds i32, ptr %dst.2.off, i64 %iv
store i32 %lv, ptr %gep.src.2, align 4
%iv.next = add nuw nsw i64 %iv, 1
%exitcond = icmp eq i64 %iv.next, 1000
br i1 %exitcond, label %exit, label %for.body

exit:
ret void
}


; Final value is not guaranteed to be stored in an invariant address.
; We don't vectorize in that case.
;
Expand Down

0 comments on commit 0c830c3

Please sign in to comment.