summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp34
-rw-r--r--llvm/test/Transforms/InstCombine/shuffle_select.ll16
2 files changed, 36 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index fec6baeb69c..458b0f02e15 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1164,10 +1164,33 @@ static Instruction *foldSelectShuffles(ShuffleVectorInst &Shuf) {
else
return nullptr;
- // TODO: There are potential folds where the opcodes do not match (mul+shl).
- if (B0->getOpcode() != B1->getOpcode())
+ // We need matching binops to fold the lanes together.
+ BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
+ BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
+ bool DropNSW = false;
+ if (ConstantsAreOp1 && Opc0 != Opc1) {
+ // If we have multiply and shift-left-by-constant, convert the shift:
+ // shl X, C --> mul X, 1 << C
+ // TODO: We drop "nsw" if shift is converted into multiply because it may
+ // not be correct when the shift amount is BitWidth - 1. We could examine
+ // each vector element to determine if it is safe to keep that flag.
+ if (Opc0 == Instruction::Mul && Opc1 == Instruction::Shl) {
+ C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1);
+ Opc1 = Instruction::Mul;
+ DropNSW = true;
+ } else if (Opc0 == Instruction::Shl && Opc1 == Instruction::Mul) {
+ C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0);
+ Opc0 = Instruction::Mul;
+ DropNSW = true;
+ }
+ }
+
+ if (Opc0 != Opc1)
return nullptr;
+ // The opcodes must be the same. Use a new name to make that clear.
+ BinaryOperator::BinaryOps BOpc = Opc0;
+
// Remove a binop and the shuffle by rearranging the constant:
// shuffle (op X, C0), (op X, C1), M --> op X, C'
// shuffle (op C0, X), (op C1, X), M --> op C', X
@@ -1179,13 +1202,14 @@ static Instruction *foldSelectShuffles(ShuffleVectorInst &Shuf) {
if (B0->isIntDivRem())
NewC = getSafeVectorConstantForIntDivRem(NewC);
- BinaryOperator::BinaryOps Opc = B0->getOpcode();
- Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(Opc, X, NewC) :
- BinaryOperator::Create(Opc, NewC, X);
+ Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, X, NewC) :
+ BinaryOperator::Create(BOpc, NewC, X);
// Flags are intersected from the 2 source binops.
NewBO->copyIRFlags(B0);
NewBO->andIRFlags(B1);
+ if (DropNSW)
+ NewBO->setHasNoSignedWrap(false);
return NewBO;
}
diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll
index a6201dd3853..767627d2154 100644
--- a/llvm/test/Transforms/InstCombine/shuffle_select.ll
+++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@@ -502,14 +502,11 @@ define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) {
ret <4 x double> %t3
}
-; FIXME:
; Shift-left with constant shift amount can be converted to mul to enable the fold.
define <4 x i32> @mul_shl(<4 x i32> %v0) {
; CHECK-LABEL: @mul_shl(
-; CHECK-NEXT: [[T1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 undef, i32 undef, i32 3, i32 4>
-; CHECK-NEXT: [[T2:%.*]] = shl nuw <4 x i32> [[V0]], <i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
; CHECK-NEXT: ret <4 x i32> [[T3]]
;
%t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -518,11 +515,11 @@ define <4 x i32> @mul_shl(<4 x i32> %v0) {
ret <4 x i32> %t3
}
+; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.
+
define <4 x i32> @shl_mul(<4 x i32> %v0) {
; CHECK-LABEL: @shl_mul(
-; CHECK-NEXT: [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
-; CHECK-NEXT: [[T2:%.*]] = mul nsw <4 x i32> [[V0]], <i32 5, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
+; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
; CHECK-NEXT: ret <4 x i32> [[T3]]
;
%t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -536,8 +533,7 @@ define <4 x i32> @shl_mul(<4 x i32> %v0) {
define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
; CHECK-LABEL: @mul_is_nop_shl(
-; CHECK-NEXT: [[T2:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
-; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
; CHECK-NEXT: ret <4 x i32> [[T3]]
;
%t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
@@ -546,6 +542,8 @@ define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
ret <4 x i32> %t3
}
+; Negative test: shift amount (operand 1) must be constant.
+
define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
OpenPOWER on IntegriCloud