summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp')
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp97
1 files changed, 96 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 7560060b760..a9442255e34 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -59,6 +59,99 @@ public:
};
} // namespace
+/// Match a pattern for a bitwise rotate operation that partially guards
+/// against undefined behavior by branching around the rotation when the shift
+/// amount is 0.
+static bool foldGuardedRotateToFunnelShift(Instruction &I) {
+ if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
+ return false;
+
+ // As with the one-use checks below, this is not strictly necessary, but we
+ // are being cautious to avoid potential perf regressions on targets that
+ // do not actually have a rotate instruction (where the funnel shift would be
+ // expanded back into math/shift/logic ops).
+ if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
+ return false;
+
+ // Match V to funnel shift left/right and capture the source operand and
+ // shift amount in X and Y.
+ auto matchRotate = [](Value *V, Value *&X, Value *&Y) {
+ Value *L0, *L1, *R0, *R1;
+ unsigned Width = V->getType()->getScalarSizeInBits();
+ auto Sub = m_Sub(m_SpecificInt(Width), m_Value(R1));
+
+ // rotate_left(X, Y) == (X << Y) | (X >> (Width - Y))
+ auto RotL = m_OneUse(m_c_Or(m_Shl(m_Value(L0), m_Value(L1)),
+ m_LShr(m_Value(R0), Sub)));
+ if (RotL.match(V) && L0 == R0 && L1 == R1) {
+ X = L0;
+ Y = L1;
+ return Intrinsic::fshl;
+ }
+
+ // rotate_right(X, Y) == (X >> Y) | (X << (Width - Y))
+ auto RotR = m_OneUse(m_c_Or(m_LShr(m_Value(L0), m_Value(L1)),
+ m_Shl(m_Value(R0), Sub)));
+ if (RotR.match(V) && L0 == R0 && L1 == R1) {
+ X = L0;
+ Y = L1;
+ return Intrinsic::fshr;
+ }
+
+ return Intrinsic::not_intrinsic;
+ };
+
+ // One phi operand must be a rotate operation, and the other phi operand must
+ // be the source value of that rotate operation:
+ // phi [ rotate(RotSrc, RotAmt), RotBB ], [ RotSrc, GuardBB ]
+ PHINode &Phi = cast<PHINode>(I);
+ Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
+ Value *RotSrc, *RotAmt;
+ Intrinsic::ID IID = matchRotate(P0, RotSrc, RotAmt);
+ if (IID == Intrinsic::not_intrinsic || RotSrc != P1) {
+ IID = matchRotate(P1, RotSrc, RotAmt);
+ if (IID == Intrinsic::not_intrinsic || RotSrc != P0)
+ return false;
+ assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
+ "Pattern must match funnel shift left or right");
+ }
+
+ // The incoming block with our source operand must be the "guard" block.
+ // That must contain a cmp+branch to avoid the rotate when the shift amount
+ // is equal to 0. The other incoming block is the block with the rotate.
+ BasicBlock *GuardBB = Phi.getIncomingBlock(RotSrc == P1);
+ BasicBlock *RotBB = Phi.getIncomingBlock(RotSrc != P1);
+ Instruction *TermI = GuardBB->getTerminator();
+ BasicBlock *TrueBB, *FalseBB;
+ ICmpInst::Predicate Pred;
+ if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(RotAmt), m_ZeroInt()),
+ TrueBB, FalseBB)))
+ return false;
+
+ BasicBlock *PhiBB = Phi.getParent();
+ if (Pred != CmpInst::ICMP_EQ || TrueBB != PhiBB || FalseBB != RotBB)
+ return false;
+
+ // We matched a variation of this IR pattern:
+ // GuardBB:
+ // %cmp = icmp eq i32 %RotAmt, 0
+ // br i1 %cmp, label %PhiBB, label %RotBB
+ // RotBB:
+ // %sub = sub i32 32, %RotAmt
+ // %shr = lshr i32 %X, %sub
+ // %shl = shl i32 %X, %RotAmt
+ // %rot = or i32 %shr, %shl
+ // br label %PhiBB
+ // PhiBB:
+ // %cond = phi i32 [ %rot, %RotBB ], [ %X, %GuardBB ]
+ // -->
+ // llvm.fshl.i32(i32 %X, i32 %RotAmt)
+ IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
+ Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
+ Phi.replaceAllUsesWith(Builder.CreateCall(F, {RotSrc, RotSrc, RotAmt}));
+ return true;
+}
+
/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
/// of 'and' ops, then we also need to capture the fact that we saw an
@@ -174,8 +267,10 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT) {
// Also, we want to avoid matching partial patterns.
// TODO: It would be more efficient if we removed dead instructions
// iteratively in this loop rather than waiting until the end.
- for (Instruction &I : make_range(BB.rbegin(), BB.rend()))
+ for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldAnyOrAllBitsSet(I);
+ MadeChange |= foldGuardedRotateToFunnelShift(I);
+ }
}
// We're done with transforms, so remove dead instructions.
OpenPOWER on IntegriCloud