[InstCombine] allow lengthening of insertelement to eliminate shuffles

As noted in post-commit comments for D52548, the limitation on increasing vector length can be applied by opcode. As a first step, this patch only allows insertelement to be widened because that has no logical downsides for IR and has little risk of pessimizing codegen. This may cause PR39132 to go into hiding during a full compile, but that bug is not fixed. llvm-svn: 343406
author: Sanjay Patel <spatel@rotateright.com> 2018-09-30 13:50:42 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2018-09-30 13:50:42 +0000
commit: 26c119a9c2f1d6866fe5996ef5a039b4fc3749ca (patch)
tree: 8caae0b7b55785ce1fd1fd5922fe0c360fb68734
parent: 818cfc40ff464d426be37a552c3db8e895c94321 (diff)
download: bcm5719-llvm-26c119a9c2f1d6866fe5996ef5a039b4fc3749ca.tar.gz
bcm5719-llvm-26c119a9c2f1d6866fe5996ef5a039b4fc3749ca.zip
2 files changed, 12 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index a87e323fd4b..c391034dc00 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -918,6 +918,13 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
     case Instruction::FPTrunc:
     case Instruction::FPExt:
     case Instruction::GetElementPtr: {
+      // Bail out if we would create longer vector ops. We could allow creating
+      // longer vector ops, but that may result in more expensive codegen. We
+      // would also need to limit the transform to avoid undefined behavior for
+      // integer div/rem.
+      Type *ITy = I->getType();
+      if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements())
+        return false;
       for (Value *Operand : I->operands()) {
         if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
           return false;
@@ -1464,8 +1471,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     if (isRHSID) return replaceInstUsesWith(SVI, RHS);
   }
 
-  if (isa<UndefValue>(RHS) && !SVI.increasesLength() &&
-      canEvaluateShuffled(LHS, Mask)) {
+  if (isa<UndefValue>(RHS) && canEvaluateShuffled(LHS, Mask)) {
     Value *V = evaluateInDifferentElementOrder(LHS, Mask);
     return replaceInstUsesWith(SVI, V);
   }
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index b3f2ca54be9..7a5ff3628d2 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -218,14 +218,13 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) {
   ret <3 x i32> %ext
 }
 
-; TODO: Increasing length of insertelements (no math ops) is a good canonicalization.
+; Increasing length of insertelements (no math ops) is a good canonicalization.
 
 define <3 x i8> @fold_inselts_with_widening_shuffle(i8 %x, i8 %y) {
 ; CHECK-LABEL: @fold_inselts_with_widening_shuffle(
-; CHECK-NEXT:    [[INS0:%.*]] = insertelement <2 x i8> undef, i8 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[INS1:%.*]] = insertelement <2 x i8> [[INS0]], i8 [[Y:%.*]], i32 1
-; CHECK-NEXT:    [[WIDEN:%.*]] = shufflevector <2 x i8> [[INS1]], <2 x i8> undef, <3 x i32> <i32 0, i32 1, i32 undef>
-; CHECK-NEXT:    ret <3 x i8> [[WIDEN]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i8> undef, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <3 x i8> [[TMP1]], i8 [[Y:%.*]], i32 1
+; CHECK-NEXT:    ret <3 x i8> [[TMP2]]
 ;
   %ins0 = insertelement <2 x i8> undef, i8 %x, i32 0
   %ins1 = insertelement <2 x i8> %ins0, i8 %y, i32 1
author	Sanjay Patel <spatel@rotateright.com>	2018-09-30 13:50:42 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2018-09-30 13:50:42 +0000
commit	26c119a9c2f1d6866fe5996ef5a039b4fc3749ca (patch)
tree	8caae0b7b55785ce1fd1fd5922fe0c360fb68734
parent	818cfc40ff464d426be37a552c3db8e895c94321 (diff)
download	bcm5719-llvm-26c119a9c2f1d6866fe5996ef5a039b4fc3749ca.tar.gz bcm5719-llvm-26c119a9c2f1d6866fe5996ef5a039b4fc3749ca.zip