diff options
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 34 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/shuffle_select.ll | 16 |
2 files changed, 36 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index fec6baeb69c..458b0f02e15 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1164,10 +1164,33 @@ static Instruction *foldSelectShuffles(ShuffleVectorInst &Shuf) { else return nullptr; - // TODO: There are potential folds where the opcodes do not match (mul+shl). - if (B0->getOpcode() != B1->getOpcode()) + // We need matching binops to fold the lanes together. + BinaryOperator::BinaryOps Opc0 = B0->getOpcode(); + BinaryOperator::BinaryOps Opc1 = B1->getOpcode(); + bool DropNSW = false; + if (ConstantsAreOp1 && Opc0 != Opc1) { + // If we have multiply and shift-left-by-constant, convert the shift: + // shl X, C --> mul X, 1 << C + // TODO: We drop "nsw" if shift is converted into multiply because it may + // not be correct when the shift amount is BitWidth - 1. We could examine + // each vector element to determine if it is safe to keep that flag. + if (Opc0 == Instruction::Mul && Opc1 == Instruction::Shl) { + C1 = ConstantExpr::getShl(ConstantInt::get(C1->getType(), 1), C1); + Opc1 = Instruction::Mul; + DropNSW = true; + } else if (Opc0 == Instruction::Shl && Opc1 == Instruction::Mul) { + C0 = ConstantExpr::getShl(ConstantInt::get(C0->getType(), 1), C0); + Opc0 = Instruction::Mul; + DropNSW = true; + } + } + + if (Opc0 != Opc1) return nullptr; + // The opcodes must be the same. Use a new name to make that clear. + BinaryOperator::BinaryOps BOpc = Opc0; + // Remove a binop and the shuffle by rearranging the constant: // shuffle (op X, C0), (op X, C1), M --> op X, C' // shuffle (op C0, X), (op C1, X), M --> op C', X @@ -1179,13 +1202,14 @@ static Instruction *foldSelectShuffles(ShuffleVectorInst &Shuf) { if (B0->isIntDivRem()) NewC = getSafeVectorConstantForIntDivRem(NewC); - BinaryOperator::BinaryOps Opc = B0->getOpcode(); - Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(Opc, X, NewC) : - BinaryOperator::Create(Opc, NewC, X); + Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, X, NewC) : + BinaryOperator::Create(BOpc, NewC, X); // Flags are intersected from the 2 source binops. NewBO->copyIRFlags(B0); NewBO->andIRFlags(B1); + if (DropNSW) + NewBO->setHasNoSignedWrap(false); return NewBO; } diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll index a6201dd3853..767627d2154 100644 --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -502,14 +502,11 @@ define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) { ret <4 x double> %t3 } -; FIXME: ; Shift-left with constant shift amount can be converted to mul to enable the fold. define <4 x i32> @mul_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_shl( -; CHECK-NEXT: [[T1:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 undef, i32 undef, i32 3, i32 4> -; CHECK-NEXT: [[T2:%.*]] = shl nuw <4 x i32> [[V0]], <i32 5, i32 6, i32 7, i32 8> -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 5, i32 2, i32 3> +; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> @@ -518,11 +515,11 @@ define <4 x i32> @mul_shl(<4 x i32> %v0) { ret <4 x i32> %t3 } +; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved. + define <4 x i32> @shl_mul(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul( -; CHECK-NEXT: [[T1:%.*]] = shl nsw <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4> -; CHECK-NEXT: [[T2:%.*]] = mul nsw <4 x i32> [[V0]], <i32 5, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 4, i32 undef, i32 2, i32 3> +; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> @@ -536,8 +533,7 @@ define <4 x i32> @shl_mul(<4 x i32> %v0) { define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_is_nop_shl( -; CHECK-NEXT: [[T2:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8> -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4> @@ -546,6 +542,8 @@ define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) { ret <4 x i32> %t3 } +; Negative test: shift amount (operand 1) must be constant. + define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul_not_constant_shift_amount( ; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]] |