summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp31
1 files changed, 28 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 3f466495c5e..2dea4c45cbd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -202,10 +202,35 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// shall be unset in the root value (OuterShift). If ShAmtsDiff is negative,
// we'll need to also produce a mask to unset ShAmtsDiff high bits.
// So, does *any* channel need a mask? (is ShiftShAmt u>= MaskShAmt ?)
- if (!match(ShAmtsDiff, m_NonNegative()))
- return nullptr; // FIXME.
+ if (!match(ShAmtsDiff, m_NonNegative())) {
+ // This sub-fold (with mask) is invalid for 'ashr' "masking" instruction.
+ if (match(Masked, m_AShr(m_Value(), m_Value())))
+ return nullptr;
+ // For a mask we need to get rid of old masking instruction.
+ if (!Masked->hasOneUse())
+ return nullptr; // Else we can't perform the fold.
+ Type *Ty = X->getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ // We should produce compute the mask in wider type, and truncate later!
+ // Get type twice as wide element-wise (same number of elements!).
+ Type *ExtendedScalarTy = Type::getIntNTy(Ty->getContext(), 2 * BitWidth);
+ Type *ExtendedTy =
+ Ty->isVectorTy()
+ ? VectorType::get(ExtendedScalarTy, Ty->getVectorNumElements())
+ : ExtendedScalarTy;
+ auto *ExtendedNumHighBitsToClear = ConstantExpr::getZExt(
+ ConstantExpr::getAdd(
+ ConstantExpr::getNeg(ShAmtsDiff),
+ ConstantInt::get(Ty, BitWidth, /*isSigned=*/false)),
+ ExtendedTy);
+ // And compute the mask as usual: (-1 l>> (ShAmtsDiff))
+ auto *ExtendedAllOnes = ConstantExpr::getAllOnesValue(ExtendedTy);
+ auto *ExtendedMask =
+ ConstantExpr::getLShr(ExtendedAllOnes, ExtendedNumHighBitsToClear);
+ NewMask = ConstantExpr::getTrunc(ExtendedMask, Ty);
+ } else
+ NewMask = nullptr; // No mask needed.
// All good, we can do this fold.
- NewMask = nullptr; // No mask needed.
} else
return nullptr; // Don't know anything about this pattern.
OpenPOWER on IntegriCloud