summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp54
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp5
-rw-r--r--llvm/test/Transforms/InstCombine/rotate.ll33
3 files changed, 61 insertions, 31 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 24a82ba9ae4..404c2ad7e6e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1810,6 +1810,57 @@ Instruction *InstCombiner::matchBSwap(BinaryOperator &Or) {
return LastInst;
}
+/// Transform UB-safe variants of bitwise rotate to the funnel shift intrinsic.
+static Instruction *matchRotate(Instruction &Or) {
+ // TODO: Can we reduce the code duplication between this and the related
+ // rotate matching code under visitSelect and visitTrunc?
+ unsigned Width = Or.getType()->getScalarSizeInBits();
+ if (!isPowerOf2_32(Width))
+ return nullptr;
+
+ // First, find an or'd pair of opposite shifts with the same shifted operand:
+ // or (lshr ShVal, ShAmt0), (shl ShVal, ShAmt1)
+ Value *Or0 = Or.getOperand(0), *Or1 = Or.getOperand(1);
+ Value *ShVal, *ShAmt0, *ShAmt1;
+ if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal), m_Value(ShAmt0)))) ||
+ !match(Or1, m_OneUse(m_LogicalShift(m_Specific(ShVal), m_Value(ShAmt1)))))
+ return nullptr;
+
+ auto ShiftOpcode0 = cast<BinaryOperator>(Or0)->getOpcode();
+ auto ShiftOpcode1 = cast<BinaryOperator>(Or1)->getOpcode();
+ if (ShiftOpcode0 == ShiftOpcode1)
+ return nullptr;
+
+ // Match the shift amount operands for a rotate pattern. This always matches
+ // a subtraction on the R operand.
+ auto matchShiftAmount = [](Value *L, Value *R, unsigned Width) -> Value * {
+ // The shift amount may be masked with negation:
+ // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+ Value *X;
+ unsigned Mask = Width - 1;
+ if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
+ match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
+ return X;
+
+ return nullptr;
+ };
+
+ Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
+ bool SubIsOnLHS = false;
+ if (!ShAmt) {
+ ShAmt = matchShiftAmount(ShAmt1, ShAmt0, Width);
+ SubIsOnLHS = true;
+ }
+ if (!ShAmt)
+ return nullptr;
+
+ bool IsFshl = (!SubIsOnLHS && ShiftOpcode0 == BinaryOperator::Shl) ||
+ (SubIsOnLHS && ShiftOpcode1 == BinaryOperator::Shl);
+ Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
+ Function *F = Intrinsic::getDeclaration(Or.getModule(), IID, Or.getType());
+ return IntrinsicInst::Create(F, { ShVal, ShVal, ShAmt });
+}
+
/// If all elements of two constant vectors are 0/-1 and inverses, return true.
static bool areInverseVectorBitmasks(Constant *C1, Constant *C2) {
unsigned NumElts = C1->getType()->getVectorNumElements();
@@ -2170,6 +2221,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Instruction *BSwap = matchBSwap(I))
return BSwap;
+ if (Instruction *Rotate = matchRotate(I))
+ return Rotate;
+
Value *X, *Y;
const APInt *CV;
if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index ebbe2afe3ec..faf58a08976 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1557,8 +1557,7 @@ static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
/// funnel shift intrinsic. Example:
/// rotl32(a, b) --> (b == 0 ? a : ((a >> (32 - b)) | (a << b)))
/// --> call llvm.fshl.i32(a, a, b)
-static Instruction *foldSelectRotate(SelectInst &Sel,
- InstCombiner::BuilderTy &Builder) {
+static Instruction *foldSelectRotate(SelectInst &Sel) {
// The false value of the select must be a rotate of the true value.
Value *Or0, *Or1;
if (!match(Sel.getFalseValue(), m_OneUse(m_Or(m_Value(Or0), m_Value(Or1)))))
@@ -2047,7 +2046,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *Select = foldSelectBinOpIdentity(SI, TLI))
return Select;
- if (Instruction *Rot = foldSelectRotate(SI, Builder))
+ if (Instruction *Rot = foldSelectRotate(SI))
return Rot;
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index cc6735fe393..4b516003a9f 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -213,12 +213,7 @@ define <3 x i42> @rotr_v3i42(<3 x i42> %x, <3 x i42> %y) {
define i32 @rotl_safe_i32(i32 %x, i32 %y) {
; CHECK-LABEL: @rotl_safe_i32(
-; CHECK-NEXT: [[NEGY:%.*]] = sub i32 0, [[Y:%.*]]
-; CHECK-NEXT: [[YMASK:%.*]] = and i32 [[Y]], 31
-; CHECK-NEXT: [[NEGYMASK:%.*]] = and i32 [[NEGY]], 31
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[YMASK]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[NEGYMASK]]
-; CHECK-NEXT: [[R:%.*]] = or i32 [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Y:%.*]])
; CHECK-NEXT: ret i32 [[R]]
;
%negy = sub i32 0, %y
@@ -236,12 +231,9 @@ define i32 @rotl_safe_i32(i32 %x, i32 %y) {
define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) {
; CHECK-LABEL: @rotl_safe_i16_commute_extra_use(
; CHECK-NEXT: [[NEGY:%.*]] = sub i16 0, [[Y:%.*]]
-; CHECK-NEXT: [[YMASK:%.*]] = and i16 [[Y]], 15
; CHECK-NEXT: [[NEGYMASK:%.*]] = and i16 [[NEGY]], 15
; CHECK-NEXT: store i16 [[NEGYMASK]], i16* [[P:%.*]], align 2
-; CHECK-NEXT: [[SHL:%.*]] = shl i16 [[X:%.*]], [[YMASK]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i16 [[X]], [[NEGYMASK]]
-; CHECK-NEXT: [[R:%.*]] = or i16 [[SHL]], [[SHR]]
+; CHECK-NEXT: [[R:%.*]] = call i16 @llvm.fshl.i16(i16 [[X:%.*]], i16 [[X]], i16 [[Y]])
; CHECK-NEXT: ret i16 [[R]]
;
%negy = sub i16 0, %y
@@ -259,12 +251,7 @@ define i16 @rotl_safe_i16_commute_extra_use(i16 %x, i16 %y, i16* %p) {
define i64 @rotr_safe_i64(i64 %x, i64 %y) {
; CHECK-LABEL: @rotr_safe_i64(
-; CHECK-NEXT: [[NEGY:%.*]] = sub i64 0, [[Y:%.*]]
-; CHECK-NEXT: [[YMASK:%.*]] = and i64 [[Y]], 63
-; CHECK-NEXT: [[NEGYMASK:%.*]] = and i64 [[NEGY]], 63
-; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[X:%.*]], [[NEGYMASK]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr i64 [[X]], [[YMASK]]
-; CHECK-NEXT: [[R:%.*]] = or i64 [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Y:%.*]])
; CHECK-NEXT: ret i64 [[R]]
;
%negy = sub i64 0, %y
@@ -305,12 +292,7 @@ define i8 @rotr_safe_i8_commute_extra_use(i8 %x, i8 %y, i8* %p) {
define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
; CHECK-LABEL: @rotl_safe_v2i32(
-; CHECK-NEXT: [[NEGY:%.*]] = sub <2 x i32> zeroinitializer, [[Y:%.*]]
-; CHECK-NEXT: [[YMASK:%.*]] = and <2 x i32> [[Y]], <i32 31, i32 31>
-; CHECK-NEXT: [[NEGYMASK:%.*]] = and <2 x i32> [[NEGY]], <i32 31, i32 31>
-; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> [[X:%.*]], [[YMASK]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i32> [[X]], [[NEGYMASK]]
-; CHECK-NEXT: [[R:%.*]] = or <2 x i32> [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> [[Y:%.*]])
; CHECK-NEXT: ret <2 x i32> [[R]]
;
%negy = sub <2 x i32> zeroinitializer, %y
@@ -327,12 +309,7 @@ define <2 x i32> @rotl_safe_v2i32(<2 x i32> %x, <2 x i32> %y) {
define <3 x i16> @rotr_safe_v3i16(<3 x i16> %x, <3 x i16> %y) {
; CHECK-LABEL: @rotr_safe_v3i16(
-; CHECK-NEXT: [[NEGY:%.*]] = sub <3 x i16> zeroinitializer, [[Y:%.*]]
-; CHECK-NEXT: [[YMASK:%.*]] = and <3 x i16> [[Y]], <i16 15, i16 15, i16 15>
-; CHECK-NEXT: [[NEGYMASK:%.*]] = and <3 x i16> [[NEGY]], <i16 15, i16 15, i16 15>
-; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[X:%.*]], [[NEGYMASK]]
-; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i16> [[X]], [[YMASK]]
-; CHECK-NEXT: [[R:%.*]] = or <3 x i16> [[SHR]], [[SHL]]
+; CHECK-NEXT: [[R:%.*]] = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> [[X:%.*]], <3 x i16> [[X]], <3 x i16> [[Y:%.*]])
; CHECK-NEXT: ret <3 x i16> [[R]]
;
%negy = sub <3 x i16> zeroinitializer, %y
OpenPOWER on IntegriCloud