summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Analysis/SyncDependenceAnalysis.cpp61
-rw-r--r--llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll111
2 files changed, 137 insertions, 35 deletions
diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
index 3cf248a3114..8447dc87069 100644
--- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
@@ -218,9 +218,11 @@ struct DivergencePropagator {
template <typename SuccessorIterable>
std::unique_ptr<ConstBlockSet>
computeJoinPoints(const BasicBlock &RootBlock,
- SuccessorIterable NodeSuccessors, const Loop *ParentLoop, const BasicBlock * PdBoundBlock) {
+ SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
assert(JoinBlocks);
+ LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints. Parent loop: " << (ParentLoop ? ParentLoop->getName() : "<null>") << "\n" );
+
// bootstrap with branch targets
for (const auto *SuccBlock : NodeSuccessors) {
DefMap.emplace(SuccBlock, SuccBlock);
@@ -228,13 +230,19 @@ struct DivergencePropagator {
if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
// immediate loop exit from node.
ReachedLoopExits.insert(SuccBlock);
- continue;
} else {
// regular successor
PendingUpdates.insert(SuccBlock);
}
}
+ LLVM_DEBUG(
+ dbgs() << "SDA: rpo order:\n";
+ for (const auto * RpoBlock : FuncRPOT) {
+ dbgs() << "- " << RpoBlock->getName() << "\n";
+ }
+ );
+
auto ItBeginRPO = FuncRPOT.begin();
// skip until term (TODO RPOT won't let us start at @term directly)
@@ -245,16 +253,18 @@ struct DivergencePropagator {
// propagate definitions at the immediate successors of the node in RPO
auto ItBlockRPO = ItBeginRPO;
- while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
+ while ((++ItBlockRPO != ItEndRPO) &&
+ !PendingUpdates.empty()) {
const auto *Block = *ItBlockRPO;
+ LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
- // skip @block if not pending update
+ // skip Block if not pending update
auto ItPending = PendingUpdates.find(Block);
if (ItPending == PendingUpdates.end())
continue;
PendingUpdates.erase(ItPending);
- // propagate definition at @block to its successors
+ // propagate definition at Block to its successors
auto ItDef = DefMap.find(Block);
const auto *DefBlock = ItDef->second;
assert(DefBlock);
@@ -278,6 +288,8 @@ struct DivergencePropagator {
}
}
+ LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
+
// We need to know the definition at the parent loop header to decide
// whether the definition at the header is different from the definition at
// the loop exits, which would indicate a divergent loop exits.
@@ -292,24 +304,17 @@ struct DivergencePropagator {
// |
// proper exit from both loops
//
- // D post-dominates B as it is the only proper exit from the "A loop".
- // If C has a divergent branch, propagation will therefore stop at D.
- // That implies that B will never receive a definition.
- // But that definition can only be the same as at D (D itself in thise case)
- // because all paths to anywhere have to pass through D.
- //
- const BasicBlock *ParentLoopHeader =
- ParentLoop ? ParentLoop->getHeader() : nullptr;
- if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
- DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
- }
-
// analyze reached loop exits
if (!ReachedLoopExits.empty()) {
+ const BasicBlock *ParentLoopHeader =
+ ParentLoop ? ParentLoop->getHeader() : nullptr;
+
assert(ParentLoop);
- const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
+ auto ItHeaderDef = DefMap.find(ParentLoopHeader);
+ const auto *HeaderDefBlock = (ItHeaderDef == DefMap.end()) ? nullptr : ItHeaderDef->second;
+
LLVM_DEBUG(printDefs(dbgs()));
- assert(HeaderDefBlock && "no definition in header of carrying loop");
+ assert(HeaderDefBlock && "no definition at header of carrying loop");
for (const auto *ExitBlock : ReachedLoopExits) {
auto ItExitDef = DefMap.find(ExitBlock);
@@ -339,19 +344,10 @@ const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
return *ItCached->second;
}
- // dont propagte beyond the immediate post dom of the loop
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Loop.getHeader()));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
- while (PdBoundBlock && Loop.contains(PdBoundBlock)) {
- IpdNode = IpdNode->getIDom();
- PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
- }
-
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
- *Loop.getHeader(), LoopExits, Loop.getParentLoop(), PdBoundBlock);
+ *Loop.getHeader(), LoopExits, Loop.getParentLoop());
auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
assert(ItInserted.second);
@@ -370,16 +366,11 @@ SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
if (ItCached != CachedBranchJoins.end())
return *ItCached->second;
- // dont propagate beyond the immediate post dominator of the branch
- const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(Term.getParent()));
- const auto *IpdNode = PdNode->getIDom();
- const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
-
// compute all join points
DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
const auto &TermBlock = *Term.getParent();
auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
- TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock), PdBoundBlock);
+ TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
assert(ItInserted.second);
diff --git a/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
new file mode 100644
index 00000000000..cb3e42de363
--- /dev/null
+++ b/llvm/test/Analysis/DivergenceAnalysis/AMDGPU/b42473-r1-crash.ll
@@ -0,0 +1,111 @@
+; RUN: opt -mtriple amdgcn-unknown-amdhsa -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s
+
+declare i32 @gf2(i32)
+declare i32 @gf1(i32)
+
+define void @tw1(i32 addrspace(4)* noalias nocapture readonly %A, i32 addrspace(4)* noalias nocapture %B) local_unnamed_addr #2 {
+; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'tw1':
+; CHECK: DIVERGENT: i32 addrspace(4)* %A
+; CHECK: DIVERGENT: i32 addrspace(4)* %B
+entry:
+; CHECK: DIVERGENT: %call = tail call i32 @gf2(i32 0) #0
+; CHECK: DIVERGENT: %cmp = icmp ult i32 %call, 16
+; CHECK: DIVERGENT: br i1 %cmp, label %if.then, label %new_exit
+ %call = tail call i32 @gf2(i32 0) #3
+ %cmp = icmp ult i32 %call, 16
+ br i1 %cmp, label %if.then, label %new_exit
+
+if.then:
+; CHECK: DIVERGENT: %call1 = tail call i32 @gf1(i32 0) #0
+; CHECK: DIVERGENT: %arrayidx = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %call1
+; CHECK: DIVERGENT: %0 = load i32, i32 addrspace(4)* %arrayidx, align 4
+; CHECK: DIVERGENT: %cmp225 = icmp sgt i32 %0, 0
+; CHECK: DIVERGENT: %arrayidx10 = getelementptr inbounds i32, i32 addrspace(4)* %B, i32 %call1
+; CHECK: DIVERGENT: br i1 %cmp225, label %while.body.preheader, label %if.then.while.end_crit_edge
+ %call1 = tail call i32 @gf1(i32 0) #4
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %call1
+ %0 = load i32, i32 addrspace(4)* %arrayidx, align 4
+ %cmp225 = icmp sgt i32 %0, 0
+ %arrayidx10 = getelementptr inbounds i32, i32 addrspace(4)* %B, i32 %call1
+ br i1 %cmp225, label %while.body.preheader, label %if.then.while.end_crit_edge
+
+while.body.preheader:
+ br label %while.body
+
+if.then.while.end_crit_edge:
+; CHECK: DIVERGENT: %.pre = load i32, i32 addrspace(4)* %arrayidx10, align 4
+ %.pre = load i32, i32 addrspace(4)* %arrayidx10, align 4
+ br label %while.end
+
+while.body:
+; CHECK-NOT: DIVERGENT: %i.026 = phi i32 [ %inc, %if.end.while.body_crit_edge ], [ 0, %while.body.preheader ]
+; CHECK: DIVERGENT: %call3 = tail call i32 @gf1(i32 0) #0
+; CHECK: DIVERGENT: %cmp4 = icmp ult i32 %call3, 10
+; CHECK: DIVERGENT: %arrayidx6 = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %i.026
+; CHECK: DIVERGENT: %1 = load i32, i32 addrspace(4)* %arrayidx6, align 4
+; CHECK: DIVERGENT: br i1 %cmp4, label %if.then5, label %if.else
+ %i.026 = phi i32 [ %inc, %if.end.while.body_crit_edge ], [ 0, %while.body.preheader ]
+ %call3 = tail call i32 @gf1(i32 0) #4
+ %cmp4 = icmp ult i32 %call3, 10
+ %arrayidx6 = getelementptr inbounds i32, i32 addrspace(4)* %A, i32 %i.026
+ %1 = load i32, i32 addrspace(4)* %arrayidx6, align 4
+ br i1 %cmp4, label %if.then5, label %if.else
+
+if.then5:
+; CHECK: DIVERGENT: %mul = shl i32 %1, 1
+; CHECK: DIVERGENT: %2 = load i32, i32 addrspace(4)* %arrayidx10, align 4
+; CHECK: DIVERGENT: %add = add nsw i32 %2, %mul
+ %mul = shl i32 %1, 1
+ %2 = load i32, i32 addrspace(4)* %arrayidx10, align 4
+ %add = add nsw i32 %2, %mul
+ br label %if.end
+
+if.else:
+; CHECK: DIVERGENT: %mul9 = shl i32 %1, 2
+; CHECK: DIVERGENT: %3 = load i32, i32 addrspace(4)* %arrayidx10, align 4
+; CHECK: DIVERGENT: %add11 = add nsw i32 %3, %mul9
+ %mul9 = shl i32 %1, 2
+ %3 = load i32, i32 addrspace(4)* %arrayidx10, align 4
+ %add11 = add nsw i32 %3, %mul9
+ br label %if.end
+
+if.end:
+; CHECK: DIVERGENT: %storemerge = phi i32 [ %add11, %if.else ], [ %add, %if.then5 ]
+; CHECK: DIVERGENT: store i32 %storemerge, i32 addrspace(4)* %arrayidx10, align 4
+; CHECK-NOT: DIVERGENT: %inc = add nuw nsw i32 %i.026, 1
+; CHECK: DIVERGENT: %exitcond = icmp ne i32 %inc, %0
+; CHECK: DIVERGENT: br i1 %exitcond, label %if.end.while.body_crit_edge, label %while.end.loopexit
+ %storemerge = phi i32 [ %add11, %if.else ], [ %add, %if.then5 ]
+ store i32 %storemerge, i32 addrspace(4)* %arrayidx10, align 4
+ %inc = add nuw nsw i32 %i.026, 1
+ %exitcond = icmp ne i32 %inc, %0
+ br i1 %exitcond, label %if.end.while.body_crit_edge, label %while.end.loopexit
+
+if.end.while.body_crit_edge:
+ br label %while.body
+
+while.end.loopexit:
+; CHECK: DIVERGENT: %storemerge.lcssa = phi i32 [ %storemerge, %if.end ]
+ %storemerge.lcssa = phi i32 [ %storemerge, %if.end ]
+ br label %while.end
+
+while.end:
+; CHECK: DIVERGENT: %4 = phi i32 [ %.pre, %if.then.while.end_crit_edge ], [ %storemerge.lcssa, %while.end.loopexit ]
+; CHECK: DIVERGENT: %i.0.lcssa = phi i32 [ 0, %if.then.while.end_crit_edge ], [ %0, %while.end.loopexit ]
+; CHECK: DIVERGENT: %sub = sub nsw i32 %4, %i.0.lcssa
+; CHECK: DIVERGENT: store i32 %sub, i32 addrspace(4)* %arrayidx10, align 4
+ %4 = phi i32 [ %.pre, %if.then.while.end_crit_edge ], [ %storemerge.lcssa, %while.end.loopexit ]
+ %i.0.lcssa = phi i32 [ 0, %if.then.while.end_crit_edge ], [ %0, %while.end.loopexit ]
+ %sub = sub nsw i32 %4, %i.0.lcssa
+ store i32 %sub, i32 addrspace(4)* %arrayidx10, align 4
+ br label %new_exit
+
+new_exit:
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind readnone }
OpenPOWER on IntegriCloud