[SimplifyCFG] Use a (trivially) dominanting widenable branch to remove later slow path blocks

This transformation is a variation on the GuardWidening transformation we have checked in as it's own pass. Instead of focusing on merge (i.e. hoisting and simplifying) two widenable branches, this transform makes the observation that simply removing a second slowpath block (by reusing an existing one) is often a very useful canonicalization. This may lead to later merging, or may not. This is a useful generalization when the intermediate block has loads whose dereferenceability is hard to establish. As noted in the patch, this can be generalized further, and will be. Differential Revision: https://reviews.llvm.org/D69689
author: Philip Reames <listmail@philipreames.com> 2019-11-04 09:40:53 -0800
committer: Philip Reames <listmail@philipreames.com> 2019-11-04 11:03:28 -0800
commit: 6ff439b57f0fc2b1a2193ae37637c531ff652b1c (patch)
tree: ccb0d8f0b5e64894d5eca7859b34876fe78cb298
parent: 113181e9bd05353ed562ee7b971bf7f1e58cd5de (diff)
download: bcm5719-llvm-6ff439b57f0fc2b1a2193ae37637c531ff652b1c.tar.gz
bcm5719-llvm-6ff439b57f0fc2b1a2193ae37637c531ff652b1c.zip
2 files changed, 364 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3a5e3293ed4..9644ba31f4d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
@@ -3212,6 +3213,47 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
   return Changed;
 }
 
+
+/// If the previous block ended with a widenable branch, determine if reusing
+/// the target block is profitable and legal.  This will have the effect of
+/// "widening" PBI, but doesn't require us to reason about hosting safety.
+static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+  // TODO: This can be generalized in two important ways:
+  // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
+  //    values from the PBI edge.
+  // 2) We can sink side effecting instructions into BI's fallthrough
+  //    successor provided they doesn't contribute to computation of
+  //    BI's condition.
+  Value *CondWB, *WC;
+  BasicBlock *IfTrueBB, *IfFalseBB;
+  if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
+      IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
+    return false;
+  if (!IfFalseBB->phis().empty())
+    return false; // TODO
+  // Use lambda to lazily compute expensive condition after cheap ones.
+  auto NoSideEffects = [](BasicBlock &BB) {
+    return !llvm::any_of(BB, [](const Instruction &I) {
+        return I.mayWriteToMemory() || I.mayHaveSideEffects();
+      });
+  };
+  if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
+      BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
+      NoSideEffects(*BI->getParent())) {
+    BI->getSuccessor(1)->removePredecessor(BI->getParent());
+    BI->setSuccessor(1, IfFalseBB);
+    return true;
+  }
+  if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
+      BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
+      NoSideEffects(*BI->getParent())) {
+    BI->getSuccessor(0)->removePredecessor(BI->getParent());
+    BI->setSuccessor(0, IfFalseBB);
+    return true;
+  }
+  return false;
+}
+
 /// If we have a conditional branch as a predecessor of another block,
 /// this function tries to simplify it.  We know
 /// that PBI and BI are both conditional branches, and BI is in one of the
@@ -3267,6 +3309,12 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
     }
   }
 
+  // If the previous block ended with a widenable branch, determine if reusing
+  // the target block is profitable and legal.  This will have the effect of
+  // "widening" PBI, but doesn't require us to reason about hosting safety.
+  if (tryWidenCondBranchToCondBranch(PBI, BI))
+    return true;
+
   if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
     if (CE->canTrap())
       return false;
diff --git a/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll b/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll
new file mode 100644
index 00000000000..d6be9cd53a1
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/wc-widen-block.ll
@@ -0,0 +1,316 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=simplify-cfg -S < %s | FileCheck %s
+
+define i32 @basic(i1 %cond_0, i32* %p) {
+; CHECK-LABEL: @basic(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]]
+; CHECK-NEXT:    [[COND_1:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[COND_1]], label [[RETURN:%.*]], label [[DEOPT]], !prof !0
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  %v = load i32, i32* %p
+  %cond_1 = icmp eq i32 %v, 0
+  br i1 %cond_1, label %return, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+return:
+  ret i32 0
+}
+
+define i32 @mergeable(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @mergeable(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND_NOT:%.*]] = xor i1 [[EXIPLICIT_GUARD_COND]], true
+; CHECK-NEXT:    [[COND_1_NOT:%.*]] = xor i1 [[COND_1:%.*]], true
+; CHECK-NEXT:    [[BRMERGE:%.*]] = or i1 [[EXIPLICIT_GUARD_COND_NOT]], [[COND_1_NOT]]
+; CHECK-NEXT:    br i1 [[BRMERGE]], label [[DEOPT:%.*]], label [[RETURN:%.*]], !prof !1
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  br i1 %cond_1, label %return, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+return:
+  ret i32 0
+}
+
+define i32 @basic_swapped_branch(i1 %cond_0, i32* %p) {
+; CHECK-LABEL: @basic_swapped_branch(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]]
+; CHECK-NEXT:    [[COND_1:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[COND_1]], label [[DEOPT]], label [[RETURN:%.*]], !prof !0
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  %v = load i32, i32* %p
+  %cond_1 = icmp eq i32 %v, 0
+  br i1 %cond_1, label %deopt2, label %return, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+return:
+  ret i32 0
+}
+
+define i32 @todo_sink_side_effect(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @todo_sink_side_effect(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    br i1 [[COND_1:%.*]], label [[RETURN:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET2]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  call void @unknown()
+  br i1 %cond_1, label %return, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+return:
+  ret i32 0
+}
+
+define i32 @neg_unsinkable_side_effect(i1 %cond_0) {
+; CHECK-LABEL: @neg_unsinkable_side_effect(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    [[V:%.*]] = call i32 @unknown_i32()
+; CHECK-NEXT:    [[COND_1:%.*]] = icmp eq i32 [[V]], 0
+; CHECK-NEXT:    br i1 [[COND_1]], label [[RETURN:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET2]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  %v = call i32 @unknown_i32()
+  %cond_1 = icmp eq i32 %v, 0
+  br i1 %cond_1, label %return, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+return:
+  ret i32 0
+}
+
+
+define i32 @neg_inf_loop(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @neg_inf_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    br i1 [[COND_1:%.*]], label [[RETURN:%.*]], label [[DEOPT]], !prof !0
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  call void @unknown()
+  br i1 %cond_1, label %return, label %deopt, !prof !0
+
+return:
+  ret i32 0
+}
+
+
+define i32 @todo_phi(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @todo_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 [[PHI]]) ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    br i1 [[COND_1:%.*]], label [[RETURN:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET2]]
+; CHECK:       return:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %phi = phi i32 [0, %entry]
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 %phi) ]
+  ret i32 %deoptret
+
+guarded:
+  br i1 %cond_1, label %return, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+
+return:
+  ret i32 0
+}
+
+
+define i32 @neg_loop(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @neg_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[GUARDED:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition()
+; CHECK-NEXT:    [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]]
+; CHECK-NEXT:    br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof !0
+; CHECK:       deopt:
+; CHECK-NEXT:    [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET]]
+; CHECK:       guarded:
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    br i1 [[COND_1:%.*]], label [[LOOP:%.*]], label [[DEOPT2:%.*]], !prof !0
+; CHECK:       deopt2:
+; CHECK-NEXT:    [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+; CHECK-NEXT:    ret i32 [[DEOPTRET2]]
+;
+entry:
+  br label %guarded
+
+loop:
+  %widenable_cond = call i1 @llvm.experimental.widenable.condition()
+  %exiplicit_guard_cond = and i1 %cond_0, %widenable_cond
+  br i1 %exiplicit_guard_cond, label %guarded, label %deopt, !prof !0
+
+deopt:
+  %deoptret = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret
+
+guarded:
+  call void @unknown()
+  br i1 %cond_1, label %loop, label %deopt2, !prof !0
+
+deopt2:
+  %deoptret2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ]
+  ret i32 %deoptret2
+}
+
+
+declare void @unknown()
+declare i32 @unknown_i32()
+
+declare i1 @llvm.experimental.widenable.condition()
+declare i32 @llvm.experimental.deoptimize.i32(...)
+
+!0 = !{!"branch_weights", i32 1048576, i32 1}
+!1 = !{i32 1, i32 -2147483648}
author	Philip Reames <listmail@philipreames.com>	2019-11-04 09:40:53 -0800
committer	Philip Reames <listmail@philipreames.com>	2019-11-04 11:03:28 -0800
commit	6ff439b57f0fc2b1a2193ae37637c531ff652b1c (patch)
tree	ccb0d8f0b5e64894d5eca7859b34876fe78cb298
parent	113181e9bd05353ed562ee7b971bf7f1e58cd5de (diff)
download	bcm5719-llvm-6ff439b57f0fc2b1a2193ae37637c531ff652b1c.tar.gz bcm5719-llvm-6ff439b57f0fc2b1a2193ae37637c531ff652b1c.zip