diff options
author | Craig Topper <craig.topper@intel.com> | 2018-07-11 22:35:28 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-07-11 22:35:28 +0000 |
commit | ed6acde8cf2f764fce5df495d5f2a24bc08a3e80 (patch) | |
tree | 6ecfd4f8a97bb039321735a45765faff916a0afd | |
parent | e18c2d2ce3b967b0bd495d2840cb640b95c80896 (diff) | |
download | bcm5719-llvm-ed6acde8cf2f764fce5df495d5f2a24bc08a3e80.tar.gz bcm5719-llvm-ed6acde8cf2f764fce5df495d5f2a24bc08a3e80.zip |
[LoopIdiomRecognize] Don't convert a do while loop to ctlz.
This commit suppresses turning loops like this into "(bitwidth - ctlz(input))".
unsigned foo(unsigned input) {
unsigned num = 0;
do {
++num;
input >>= 1;
} while (input != 0);
return num;
}
The loop version returns a value of 1 for both an input of 0 and an input of 1. Converting to a naive ctlz does not preserve that.
Theoretically we could do better if we checked isKnownNonZero or we could insert a select to handle the divergence. But until we have motivating cases for that, this is the easiest solution.
llvm-svn: 336864
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 25 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopIdiom/X86/ctlz.ll | 23 |
2 files changed, 30 insertions, 18 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 3e9546fda8e..d8692198f7a 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1405,16 +1405,21 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() { if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL)) return false; - // If we check X != 0 before entering the loop we don't need a zero - // check in CTLZ intrinsic, but only if Cnt Phi is not used outside of the - // loop (if it is used we count CTLZ(X >> 1)). - if (!IsCntPhiUsedOutsideLoop) - if (BasicBlock *PreCondBB = PH->getSinglePredecessor()) - if (BranchInst *PreCondBr = - dyn_cast<BranchInst>(PreCondBB->getTerminator())) { - if (matchCondition(PreCondBr, PH) == InitX) - ZeroCheck = true; - } + // If we are using the count instruction outside the loop, make sure we + // have a zero check as a precondition. Without the check the loop would run + // one iteration for before any check of the input value. This means 0 and 1 + // would have identical behavior in the original loop and thus + if (!IsCntPhiUsedOutsideLoop) { + auto *PreCondBB = PH->getSinglePredecessor(); + if (!PreCondBB) + return false; + auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator()); + if (!PreCondBI) + return false; + if (matchCondition(PreCondBI, PH) != InitX) + return false; + ZeroCheck = true; + } // Check if CTLZ intrinsic is profitable. Assume it is always profitable // if we delete the loop (the loop has only 6 instructions): diff --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll index b863856f206..12f1043475c 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll @@ -483,7 +483,7 @@ while.end: ; preds = %while.cond.while.en ret i32 %cnt.0.lcssa } -; FIXME: We should not transform this loop. It returns 1 for an input of both +; We can't easily transform this loop. It returns 1 for an input of both ; 0 and 1. ; ; int ctlz_bad(unsigned n) @@ -496,15 +496,22 @@ while.end: ; preds = %while.cond.while.en ; return i; ; } ; -; ALL: entry -; ALL-NEXT: %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 false) -; ALL-NEXT: %1 = sub i32 32, %0 -; ALL-NEXT: br label %while.cond -; ALL: %inc.lcssa = phi i32 [ %1, %while.cond ] -; ALL: ret i32 %inc.lcssa - ; Function Attrs: norecurse nounwind readnone uwtable define i32 @ctlz_bad(i32 %n) { +; ALL-LABEL: @ctlz_bad( +; ALL-NEXT: entry: +; ALL-NEXT: br label [[WHILE_COND:%.*]] +; ALL: while.cond: +; ALL-NEXT: [[N_ADDR_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[SHR:%.*]], [[WHILE_COND]] ] +; ALL-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[WHILE_COND]] ] +; ALL-NEXT: [[SHR]] = lshr i32 [[N_ADDR_0]], 1 +; ALL-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[SHR]], 0 +; ALL-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 +; ALL-NEXT: br i1 [[TOBOOL]], label [[WHILE_END:%.*]], label [[WHILE_COND]] +; ALL: while.end: +; ALL-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[WHILE_COND]] ] +; ALL-NEXT: ret i32 [[INC_LCSSA]] +; entry: br label %while.cond |