Fix a bug w/inbounds invalidation in LFTR (recommit)

Recommit r363289 with a bug fix for crash identified in pr42279. Issue was that a loop exit test does not have to be an icmp, leading to a null dereference crash when new logic was exercised for that case. Test case previously committed in r363601. Original commit comment follows: This contains fixes for two cases where we might invalidate inbounds and leave it stale in the IR (a miscompile). Case 1 is when switching to an IV with no dynamically live uses, and case 2 is when doing pre-to-post conversion on the same pointer type IV. The basic scheme used is to prove that using the given IV (pre or post increment forms) would have to already trigger UB on the path to the test we're modifying. As such, our potential UB triggering use does not change the semantics of the original program. As was pointed out in the review thread by Nikita, this is defending against a separate issue from the hasConcreteDef case. This is about poison, that's about undef. Unfortunately, the two are different, see Nikita's comment for a fuller explanation, he explains it well. (Note: I'm going to address Nikita's last style comment in a separate commit just to minimize chance of subtle bugs being introduced due to typos.) Differential Revision: https://reviews.llvm.org/D62939 llvm-svn: 363613
author: Philip Reames <listmail@philipreames.com> 2019-06-17 20:32:22 +0000
committer: Philip Reames <listmail@philipreames.com> 2019-06-17 20:32:22 +0000
commit: fe8bd96ebd6c490ea0b5c1fb342db2d7c393a109 (patch)
tree: 891fdead1166bf0afdc5c874977b6570f5ca24d5 /llvm/lib
parent: abccb1ad896f27f82cfad160ccd582171575fefb (diff)
download: bcm5719-llvm-fe8bd96ebd6c490ea0b5c1fb342db2d7c393a109.tar.gz
bcm5719-llvm-fe8bd96ebd6c490ea0b5c1fb342db2d7c393a109.zip
1 files changed, 85 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7c0dc9b99bf..558ea27659c 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,6 +31,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -42,6 +43,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -2077,6 +2079,48 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
   return Phi != getLoopPhiForCounter(IncV, L);
 }
 
+/// Return true if undefined behavior would provable be executed on the path to
+/// OnPathTo if Root produced a posion result.  Note that this doesn't say
+/// anything about whether OnPathTo is actually executed or whether Root is
+/// actually poison.  This can be used to assess whether a new use of Root can
+/// be added at a location which is control equivalent with OnPathTo (such as
+/// immediately before it) without introducing UB which didn't previously
+/// exist.  Note that a false result conveys no information.  
+static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
+                                          Instruction *OnPathTo, 
+                                          DominatorTree *DT) {
+  // Basic approach is to assume Root is poison, propagate poison forward
+  // through all users we can easily track, and then check whether any of those
+  // users are provable UB and must execute before out exiting block might
+  // exit.
+
+  // The set of all recursive users we've visited (which are assumed to all be
+  // poison because of said visit)
+  SmallSet<const Value *, 16> KnownPoison;
+  SmallVector<const Instruction*, 16> Worklist;
+  Worklist.push_back(Root);
+  while (!Worklist.empty()) {
+    const Instruction *I = Worklist.pop_back_val();
+
+    // If we know this must trigger UB on a path leading our target.
+    if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
+      return true;
+    
+    // If we can't analyze propagation through this instruction, just skip it
+    // and transitive users.  Safe as false is a conservative result.
+    if (!propagatesFullPoison(I) && I != Root)
+      continue;
+
+    if (KnownPoison.insert(I).second)
+      for (const User *User : I->users())
+        Worklist.push_back(cast<Instruction>(User));
+  }
+
+  // Might be non-UB, or might have a path we couldn't prove must execute on
+  // way to exiting bb. 
+  return false;
+}
+
 /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
 /// down to checking that all operands are constant and listing instructions
 /// that may hide undef.
@@ -2165,7 +2209,8 @@ static bool isLoopCounter(PHINode* Phi, Loop *L,
 /// valid count without scaling the address stride, so it remains a pointer
 /// expression as far as SCEV is concerned.
 static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
-                                const SCEV *BECount, ScalarEvolution *SE) {
+                                const SCEV *BECount,
+                                ScalarEvolution *SE, DominatorTree *DT) {
   uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
 
   Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
@@ -2210,6 +2255,18 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
             continue;
     }
 
+    // Avoid introducing undefined behavior due to poison which didn't exist in
+    // the original program.  (Annoyingly, the rules for poison and undef
+    // propagation are distinct, so this does NOT cover the undef case above.)
+    // We have to ensure that we don't introduce UB by introducing a use on an
+    // iteration where said IV produces poison.  Our strategy here differs for
+    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
+    // see code in linearFunctionTestReplace.  For pointers, we restrict
+    // transforms as there is no good way to reinfer inbounds once lost.
+    if (!Phi->getType()->isIntegerTy() &&
+        !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
+      continue;
+    
     const SCEV *Init = AR->getStart();
 
     if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
@@ -2338,15 +2395,32 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
   // compare against the post-incremented value, otherwise we must compare
   // against the preincremented value.
   if (ExitingBB == L->getLoopLatch()) {
-    // Add one to the "backedge-taken" count to get the trip count.
-    // This addition may overflow, which is valid as long as the comparison is
-    // truncated to BackedgeTakenCount->getType().
-    IVCount = SE->getAddExpr(BackedgeTakenCount,
-                             SE->getOne(BackedgeTakenCount->getType()));
-    // The BackedgeTaken expression contains the number of times that the
-    // backedge branches to the loop header.  This is one less than the
-    // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB);
+    bool SafeToPostInc = IndVar->getType()->isIntegerTy();
+    if (!SafeToPostInc) {
+      // For pointer IVs, we chose to not strip inbounds which requires us not
+      // to add a potentially UB introducing use.  We need to either a) show
+      // the loop test we're modifying is already in post-inc form, or b) show
+      // that adding a use must not introduce UB.
+      Instruction *Inc =
+        cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
+      if (ICmpInst *LoopTest = getLoopTest(L, ExitingBB))
+        SafeToPostInc = LoopTest->getOperand(0) == Inc ||
+          LoopTest->getOperand(1) == Inc;
+      if (!SafeToPostInc)
+        SafeToPostInc =
+          mustExecuteUBIfPoisonOnPathTo(Inc, ExitingBB->getTerminator(), DT);
+    }
+    if (SafeToPostInc) {
+      // Add one to the "backedge-taken" count to get the trip count.
+      // This addition may overflow, which is valid as long as the comparison
+      // is truncated to BackedgeTakenCount->getType().
+      IVCount = SE->getAddExpr(BackedgeTakenCount,
+                               SE->getOne(BackedgeTakenCount->getType()));
+      // The BackedgeTaken expression contains the number of times that the
+      // backedge branches to the loop header.  This is one less than the
+      // number of times the loop executes, so use the incremented indvar.
+      CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBB);
+    }
   }
 
   // It may be necessary to drop nowrap flags on the incrementing instruction
@@ -2646,7 +2720,7 @@ bool IndVarSimplify::run(Loop *L) {
       if (BETakenCount->isZero())
         continue;
       
-      PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE);
+      PHINode *IndVar = FindLoopCounter(L, ExitingBB, BETakenCount, SE, DT);
       if (!IndVar)
         continue;
author	Philip Reames <listmail@philipreames.com>	2019-06-17 20:32:22 +0000
committer	Philip Reames <listmail@philipreames.com>	2019-06-17 20:32:22 +0000
commit	fe8bd96ebd6c490ea0b5c1fb342db2d7c393a109 (patch)
tree	891fdead1166bf0afdc5c874977b6570f5ca24d5 /llvm/lib
parent	abccb1ad896f27f82cfad160ccd582171575fefb (diff)
download	bcm5719-llvm-fe8bd96ebd6c490ea0b5c1fb342db2d7c393a109.tar.gz bcm5719-llvm-fe8bd96ebd6c490ea0b5c1fb342db2d7c393a109.zip