diff options
| -rw-r--r-- | llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 34 | ||||
| -rw-r--r-- | llvm/test/Transforms/LoopIdiom/X86/ctlz.ll | 162 | 
2 files changed, 183 insertions, 13 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 1b423d8e7d8..3e9546fda8e 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -188,8 +188,9 @@ private:                                 PHINode *CntPhi, Value *Var);    bool recognizeAndInsertCTLZ();    void transformLoopToCountable(BasicBlock *PreCondBB, Instruction *CntInst, -                                PHINode *CntPhi, Value *Var, const DebugLoc &DL, -                                bool ZeroCheck, bool IsCntPhiUsedOutsideLoop); +                                PHINode *CntPhi, Value *Var, Instruction *DefX, +                                const DebugLoc &DL, bool ZeroCheck, +                                bool IsCntPhiUsedOutsideLoop);    /// @}  }; @@ -1316,8 +1317,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX,      return false;    // step 2: detect instructions corresponding to "x.next = x >> 1" -  // TODO: Support loops that use LShr. -  if (!DefX || DefX->getOpcode() != Instruction::AShr) +  if (!DefX || (DefX->getOpcode() != Instruction::AShr && +                DefX->getOpcode() != Instruction::LShr))      return false;    ConstantInt *Shft = dyn_cast<ConstantInt>(DefX->getOperand(1));    if (!Shft || !Shft->isOne()) @@ -1401,8 +1402,7 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {    // Make sure the initial value can't be negative otherwise the ashr in the    // loop might never reach zero which would make the loop infinite. -  // TODO: Support loops that use lshr and wouldn't need this check. -  if (!isKnownNonNegative(InitX, *DL)) +  if (DefX->getOpcode() == Instruction::AShr && !isKnownNonNegative(InitX, *DL))      return false;    // If we check X != 0 before entering the loop we don't need a zero @@ -1433,8 +1433,9 @@ bool LoopIdiomRecognize::recognizeAndInsertCTLZ() {            TargetTransformInfo::TCC_Basic)      return false; -  transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX->getDebugLoc(), -                           ZeroCheck, IsCntPhiUsedOutsideLoop); +  transformLoopToCountable(PH, CntInst, CntPhi, InitX, DefX, +                           DefX->getDebugLoc(), ZeroCheck, +                           IsCntPhiUsedOutsideLoop);    return true;  } @@ -1547,7 +1548,8 @@ static CallInst *createCTLZIntrinsic(IRBuilder<> &IRBuilder, Value *Val,  /// If CntInst and DefX are not used in LOOP_BODY they will be removed.  void LoopIdiomRecognize::transformLoopToCountable(      BasicBlock *Preheader, Instruction *CntInst, PHINode *CntPhi, Value *InitX, -    const DebugLoc &DL, bool ZeroCheck, bool IsCntPhiUsedOutsideLoop) { +    Instruction *DefX, const DebugLoc &DL, bool ZeroCheck, +    bool IsCntPhiUsedOutsideLoop) {    BranchInst *PreheaderBr = cast<BranchInst>(Preheader->getTerminator());    // Step 1: Insert the CTLZ instruction at the end of the preheader block @@ -1558,10 +1560,16 @@ void LoopIdiomRecognize::transformLoopToCountable(    Builder.SetCurrentDebugLocation(DL);    Value *CTLZ, *Count, *CountPrev, *NewCount, *InitXNext; -  if (IsCntPhiUsedOutsideLoop) -    InitXNext = Builder.CreateAShr(InitX, -                                   ConstantInt::get(InitX->getType(), 1)); -  else +  if (IsCntPhiUsedOutsideLoop) { +    if (DefX->getOpcode() == Instruction::AShr) +      InitXNext = +          Builder.CreateAShr(InitX, ConstantInt::get(InitX->getType(), 1)); +    else if (DefX->getOpcode() == Instruction::LShr) +      InitXNext = +          Builder.CreateLShr(InitX, ConstantInt::get(InitX->getType(), 1)); +    else +      llvm_unreachable("Unexpected opcode!");       +  } else      InitXNext = InitX;    CTLZ = createCTLZIntrinsic(Builder, InitXNext, DL, ZeroCheck);    Count = Builder.CreateSub( diff --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll index 1ce13e25fe5..e1e6998f64c 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll @@ -119,6 +119,52 @@ while.end:                                        ; preds = %while.end.loopexit,  ; Here it will replace the loop -  ; assume builtin is always profitable.  ; +; int ctlz_zero_check_lshr(int n) +; { +;   int i = 0; +;   while(n) { +;     n >>= 1; +;     i++; +;   } +;   return i; +; } +; +; ALL:  entry +; ALL:  %0 = call i32 @llvm.ctlz.i32(i32 %n, i1 true) +; ALL-NEXT:  %1 = sub i32 32, %0 +; ALL:  %inc.lcssa = phi i32 [ %1, %while.body ] +; ALL:  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc.lcssa, %while.end.loopexit ] +; ALL:  ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_zero_check_lshr(i32 %n) { +entry: +  %tobool4 = icmp eq i32 %n, 0 +  br i1 %tobool4, label %while.end, label %while.body.preheader + +while.body.preheader:                             ; preds = %entry +  br label %while.body + +while.body:                                       ; preds = %while.body.preheader, %while.body +  %i.06 = phi i32 [ %inc, %while.body ], [ 0, %while.body.preheader ] +  %n.addr.05 = phi i32 [ %shr, %while.body ], [ %n, %while.body.preheader ] +  %shr = lshr i32 %n.addr.05, 1 +  %inc = add nsw i32 %i.06, 1 +  %tobool = icmp eq i32 %shr, 0 +  br i1 %tobool, label %while.end.loopexit, label %while.body + +while.end.loopexit:                               ; preds = %while.body +  br label %while.end + +while.end:                                        ; preds = %while.end.loopexit, %entry +  %i.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.end.loopexit ] +  ret i32 %i.0.lcssa +} + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +;  ; int ctlz(int n)  ; {  ;   n = n >= 0 ? n : -n; @@ -161,6 +207,44 @@ while.end:                                        ; preds = %while.cond  ; Here it will replace the loop -  ; assume builtin is always profitable.  ; +; int ctlz_lshr(int n) +; { +;   int i = 0; +;   while(n >>= 1) { +;     i++; +;   } +;   return i; +; } +; +; ALL:  entry +; ALL:  %0 = lshr i32 %n, 1 +; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT:  %2 = sub i32 32, %1 +; ALL-NEXT:  %3 = add i32 %2, 1 +; ALL:  %i.0.lcssa = phi i32 [ %2, %while.cond ] +; ALL:  ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_lshr(i32 %n) { +entry: +  br label %while.cond + +while.cond:                                       ; preds = %while.cond, %entry +  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] +  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] +  %shr = lshr i32 %n.addr.0, 1 +  %tobool = icmp eq i32 %shr, 0 +  %inc = add nsw i32 %i.0, 1 +  br i1 %tobool, label %while.end, label %while.cond + +while.end:                                        ; preds = %while.cond +  ret i32 %i.0 +} + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +;  ; int ctlz_add(int n, int i0)  ; {  ;   n = n >= 0 ? n : -n; @@ -204,6 +288,45 @@ while.end:                                        ; preds = %while.cond  ; Here it will replace the loop -  ; assume builtin is always profitable.  ; +; int ctlz_add_lshr(int n, int i0) +; { +;   int i = i0; +;   while(n >>= 1) { +;     i++; +;   } +;   return i; +; } +; +; ALL:  entry +; ALL:  %0 = lshr i32 %n, 1 +; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT:  %2 = sub i32 32, %1 +; ALL-NEXT:  %3 = add i32 %2, 1 +; ALL-NEXT:  %4 = add i32 %2, %i0 +; ALL:  %i.0.lcssa = phi i32 [ %4, %while.cond ] +; ALL:  ret i32 %i.0.lcssa +; +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_add_lshr(i32 %n, i32 %i0) { +entry: +  br label %while.cond + +while.cond:                                       ; preds = %while.cond, %entry +  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] +  %i.0 = phi i32 [ %i0, %entry ], [ %inc, %while.cond ] +  %shr = lshr i32 %n.addr.0, 1 +  %tobool = icmp eq i32 %shr, 0 +  %inc = add nsw i32 %i.0, 1 +  br i1 %tobool, label %while.end, label %while.cond + +while.end:                                        ; preds = %while.cond +  ret i32 %i.0 +} + +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +;  ; int ctlz_sext(short in)  ; {  ;   int n = in; @@ -245,6 +368,45 @@ while.end:                                        ; preds = %while.cond    ret i32 %i.0  } +; Recognize CTLZ builtin pattern. +; Here it will replace the loop - +; assume builtin is always profitable. +; +; int ctlz_sext_lshr(short in) +; { +;   int i = 0; +;   while(in >>= 1) { +;     i++; +;   } +;   return i; +; } +; +; ALL:  entry +; ALL:  %0 = lshr i32 %n, 1 +; ALL-NEXT:  %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 false) +; ALL-NEXT:  %2 = sub i32 32, %1 +; ALL-NEXT:  %3 = add i32 %2, 1 +; ALL:  %i.0.lcssa = phi i32 [ %2, %while.cond ] +; ALL:  ret i32 %i.0.lcssa + +; Function Attrs: norecurse nounwind readnone uwtable +define i32 @ctlz_sext_lshr(i16 %in) { +entry: +  %n = sext i16 %in to i32 +  br label %while.cond + +while.cond:                                       ; preds = %while.cond, %entry +  %n.addr.0 = phi i32 [ %n, %entry ], [ %shr, %while.cond ] +  %i.0 = phi i32 [ 0, %entry ], [ %inc, %while.cond ] +  %shr = lshr i32 %n.addr.0, 1 +  %tobool = icmp eq i32 %shr, 0 +  %inc = add nsw i32 %i.0, 1 +  br i1 %tobool, label %while.end, label %while.cond + +while.end:                                        ; preds = %while.cond +  ret i32 %i.0 +} +  ; This loop contains a volatile store. If x is initially negative,  ; the code will be an infinite loop because the ashr will eventually produce  ; all ones and continue doing so. This prevents the loop from terminating. If  | 

