[InstCombine][SSE] Added support to VPERMILVAR to shuffle combine to accept UNDEF elements.

llvm-svn: 268204
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-05-01 20:22:42 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-05-01 20:22:42 +0000
commit: eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de (patch)
tree: c9a808f0c1ef87ab5d4757e836f6920a34f08d8d /llvm
parent: cc7f567b6a465d5940774b2d78b270f5ced93187 (diff)
download: bcm5719-llvm-eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de.tar.gz
bcm5719-llvm-eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de.zip
2 files changed, 35 insertions, 28 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bb7f260b2ec..25f4c768fcd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -604,7 +604,7 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
          "Unexpected number of elements in shuffle mask!");
 
   // Construct a shuffle mask from constant integers or UNDEFs.
-  Constant *Indexes[32] = { NULL };
+  Constant *Indexes[32] = {NULL};
 
   // Each byte in the shuffle control mask forms an index to permute the
   // corresponding byte in the destination operand.
@@ -644,39 +644,46 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
   if (!V)
     return nullptr;
 
-  unsigned Size = cast<VectorType>(V->getType())->getNumElements();
-  assert(Size == 8 || Size == 4 || Size == 2);
+  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
+  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+  assert(NumElts == 8 || NumElts == 4 || NumElts == 2);
 
-  // Initialize the resulting shuffle mask to all zeroes.
-  uint32_t Indexes[8] = { 0 };
+  // Construct a shuffle mask from constant integers or UNDEFs.
+  Constant *Indexes[8] = {NULL};
 
   // The intrinsics only read one or two bits, clear the rest.
-  for (unsigned I = 0; I < Size; ++I) {
+  for (unsigned I = 0; I < NumElts; ++I) {
     Constant *COp = V->getAggregateElement(I);
-    if (!COp || !isa<ConstantInt>(COp))
+    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
       return nullptr;
 
-    int32_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue() & 0x3;
+    if (isa<UndefValue>(COp)) {
+      Indexes[I] = UndefValue::get(MaskEltTy);
+      continue;
+    }
+
+    APInt Index = cast<ConstantInt>(COp)->getValue();
+    Index = Index.zextOrTrunc(32).getLoBits(2);
 
     // The PD variants uses bit 1 to select per-lane element index, so
     // shift down to convert to generic shuffle mask index.
     if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
         II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
-      Index >>= 1;
-    Indexes[I] = Index;
-  }
+      Index = Index.lshr(1);
 
-  // The _256 variants are a bit trickier since the mask bits always index
-  // into the corresponding 128 half. In order to convert to a generic
-  // shuffle, we have to make that explicit.
-  if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
-      II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
-    for (unsigned I = Size / 2; I < Size; ++I)
-      Indexes[I] += Size / 2;
+    // The _256 variants are a bit trickier since the mask bits always index
+    // into the corresponding 128 half. In order to convert to a generic
+    // shuffle, we have to make that explicit.
+    if ((II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
+         II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) &&
+        ((NumElts / 2) <= I)) {
+      Index += APInt(32, NumElts / 2);
+    }
+
+    Indexes[I] = ConstantInt::get(MaskEltTy, Index);
   }
 
-  auto ShuffleMask =
-      ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size));
+  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
   auto V1 = II.getArgOperand(0);
   auto V2 = UndefValue::get(V1->getType());
   return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
diff --git a/llvm/test/Transforms/InstCombine/x86-avx.ll b/llvm/test/Transforms/InstCombine/x86-avx.ll
index cb20038fae7..6590642deeb 100644
--- a/llvm/test/Transforms/InstCombine/x86-avx.ll
+++ b/llvm/test/Transforms/InstCombine/x86-avx.ll
@@ -117,8 +117,8 @@ define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
 
 define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
 ; CHECK-LABEL: @undef_test_vpermilvar_ps(
-; CHECK-NEXT:    [[A:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
-; CHECK-NEXT:    ret <4 x float> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
   ret <4 x float> %a
@@ -126,8 +126,8 @@ define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
 
 define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
 ; CHECK-LABEL: @undef_test_vpermilvar_ps_256(
-; CHECK-NEXT:    [[A:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
-; CHECK-NEXT:    ret <8 x float> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
   %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
   ret <8 x float> %a
@@ -135,8 +135,8 @@ define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
 
 define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
 ; CHECK-LABEL: @undef_test_vpermilvar_pd(
-; CHECK-NEXT:    [[A:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
-; CHECK-NEXT:    ret <2 x double> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
   ret <2 x double> %a
@@ -144,8 +144,8 @@ define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
 
 define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) {
 ; CHECK-LABEL: @undef_test_vpermilvar_pd_256(
-; CHECK-NEXT:    [[A:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
-; CHECK-NEXT:    ret <4 x double> [[A]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
+; CHECK-NEXT:    ret <4 x double> [[TMP1]]
 ;
   %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
   ret <4 x double> %a
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-05-01 20:22:42 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-05-01 20:22:42 +0000
commit	eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de (patch)
tree	c9a808f0c1ef87ab5d4757e836f6920a34f08d8d /llvm
parent	cc7f567b6a465d5940774b2d78b270f5ced93187 (diff)
download	bcm5719-llvm-eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de.tar.gz bcm5719-llvm-eeacc40e27a75d07f360a58b18f1bf1bb3fcd4de.zip