diff options
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 10 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/vec_shuffle.ll | 9 |
2 files changed, 12 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index a87e323fd4b..c391034dc00 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -918,6 +918,13 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask, case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::GetElementPtr: { + // Bail out if we would create longer vector ops. We could allow creating + // longer vector ops, but that may result in more expensive codegen. We + // would also need to limit the transform to avoid undefined behavior for + // integer div/rem. + Type *ITy = I->getType(); + if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements()) + return false; for (Value *Operand : I->operands()) { if (!canEvaluateShuffled(Operand, Mask, Depth - 1)) return false; @@ -1464,8 +1471,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (isRHSID) return replaceInstUsesWith(SVI, RHS); } - if (isa<UndefValue>(RHS) && !SVI.increasesLength() && - canEvaluateShuffled(LHS, Mask)) { + if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) { Value *V = evaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index b3f2ca54be9..7a5ff3628d2 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -218,14 +218,13 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) { ret <3 x i32> %ext } -; TODO: Increasing length of insertelements (no math ops) is a good canonicalization. +; Increasing length of insertelements (no math ops) is a good canonicalization. define <3 x i8> @fold_inselts_with_widening_shuffle(i8 %x, i8 %y) { ; CHECK-LABEL: @fold_inselts_with_widening_shuffle( -; CHECK-NEXT: [[INS0:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0 -; CHECK-NEXT: [[INS1:%.*]] = insertelement <2 x i8> [[INS0]], i8 [[Y:%.*]], i32 1 -; CHECK-NEXT: [[WIDEN:%.*]] = shufflevector <2 x i8> [[INS1]], <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 undef> -; CHECK-NEXT: ret <3 x i8> [[WIDEN]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1 +; CHECK-NEXT: ret <3 x i8> [[TMP2]] ; %ins0 = insertelement <2 x i8> undef, i8 %x, i32 0 %ins1 = insertelement <2 x i8> %ins0, i8 %y, i32 1 |