diff options
Diffstat (limited to 'llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp')
-rw-r--r-- | llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp | 97 |
1 files changed, 96 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 7560060b760..a9442255e34 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -59,6 +59,99 @@ public: }; } // namespace +/// Match a pattern for a bitwise rotate operation that partially guards +/// against undefined behavior by branching around the rotation when the shift +/// amount is 0. +static bool foldGuardedRotateToFunnelShift(Instruction &I) { + if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2) + return false; + + // As with the one-use checks below, this is not strictly necessary, but we + // are being cautious to avoid potential perf regressions on targets that + // do not actually have a rotate instruction (where the funnel shift would be + // expanded back into math/shift/logic ops). + if (!isPowerOf2_32(I.getType()->getScalarSizeInBits())) + return false; + + // Match V to funnel shift left/right and capture the source operand and + // shift amount in X and Y. + auto matchRotate = [](Value *V, Value *&X, Value *&Y) { + Value *L0, *L1, *R0, *R1; + unsigned Width = V->getType()->getScalarSizeInBits(); + auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1)); + + // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y)) + auto RotL = m_OneUse(m_c_Or(m_Shl(m_Value(L0), m_Value(L1)), + m_LShr(m_Value(R0), Sub))); + if (RotL.match(V) && L0 == R0 && L1 == R1) { + X = L0; + Y = L1; + return Intrinsic::fshl; + } + + // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y)) + auto RotR = m_OneUse(m_c_Or(m_LShr(m_Value(L0), m_Value(L1)), + m_Shl(m_Value(R0), Sub))); + if (RotR.match(V) && L0 == R0 && L1 == R1) { + X = L0; + Y = L1; + return Intrinsic::fshr; + } + + return Intrinsic::not_intrinsic; + }; + + // One phi operand must be a rotate operation, and the other phi operand must + // be the source value of that rotate operation: + // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ] + PHINode &Phi = cast<PHINode>(I); + Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1); + Value *RotSrc, *RotAmt; + Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt); + if (IID == Intrinsic::not_intrinsic || RotSrc != P1) { + IID = matchRotate(P1, RotSrc, RotAmt); + if (IID == Intrinsic::not_intrinsic || RotSrc != P0) + return false; + assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) && + "Pattern must match funnel shift left or right"); + } + + // The incoming block with our source operand must be the "guard" block. + // That must contain a cmp+branch to avoid the rotate when the shift amount + // is equal to 0. The other incoming block is the block with the rotate. + BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1); + BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1); + Instruction *TermI = GuardBB->getTerminator(); + BasicBlock *TrueBB, *FalseBB; + ICmpInst::Predicate Pred; + if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()), + TrueBB, FalseBB))) + return false; + + BasicBlock *PhiBB = Phi.getParent(); + if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB) + return false; + + // We matched a variation of this IR pattern: + // GuardBB: + // %cmp = icmp eq i32 %RotAmt, 0 + // br i1 %cmp, label %PhiBB, label %RotBB + // RotBB: + // %sub = sub i32 32, %RotAmt + // %shr = lshr i32 %X, %sub + // %shl = shl i32 %X, %RotAmt + // %rot = or i32 %shr, %shl + // br label %PhiBB + // PhiBB: + // %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ] + // --> + // llvm.fshl.i32(i32 %X, i32 %RotAmt) + IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt()); + Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType()); + Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt})); + return true; +} + /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain /// of 'and' ops, then we also need to capture the fact that we saw an @@ -174,8 +267,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) { // Also, we want to avoid matching partial patterns. // TODO: It would be more efficient if we removed dead instructions // iteratively in this loop rather than waiting until the end. - for (Instruction &I : make_range(BB.rbegin(), BB.rend())) + for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldAnyOrAllBitsSet(I); + MadeChange |= foldGuardedRotateToFunnelShift(I); + } } // We're done with transforms, so remove dead instructions. |