[x86] instcombine more cases of insertps into a shufflevector

This is a follow-on to D8833 (insertps optimization when the zero mask is not used). In this patch, we check for the case where the zmask is used, but both input vectors to the insertps intrinsic are the same operand or the zmask overrides the destination lane. This lets us replace the 2nd shuffle input operand with the zero vector. Differential Revision: http://reviews.llvm.org/D9257 llvm-svn: 235810
author: Sanjay Patel <spatel@rotateright.com> 2015-04-25 20:55:25 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-04-25 20:55:25 +0000
commit: c1d20a36fb5a6034c76a10bded21b43561ec2a3d (patch)
tree: 7eb0144ffd7a10362ecbb0ed9cfadd9f0bd6fe6c /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
parent: 3eb5146b3cc2c6e99943811d54f92e728c42d37b (diff)
download: bcm5719-llvm-c1d20a36fb5a6034c76a10bded21b43561ec2a3d.tar.gz
bcm5719-llvm-c1d20a36fb5a6034c76a10bded21b43561ec2a3d.zip
1 files changed, 31 insertions, 14 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 1505ff073b1..2f83cc8961b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -201,7 +201,7 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II,
                                   InstCombiner::BuilderTy &Builder) {
   if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
     VectorType *VecTy = cast<VectorType>(II.getType());
-    ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+    assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
     
     // The immediate permute control byte looks like this:
     //    [3:0] - zero mask for each 32-bit lane
@@ -213,25 +213,42 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II,
     uint8_t DestLane = (Imm >> 4) & 0x3;
     uint8_t SourceLane = (Imm >> 6) & 0x3;
 
+    ConstantAggregateZero *ZeroVector = ConstantAggregateZero::get(VecTy);
+
     // If all zero mask bits are set, this was just a weird way to
     // generate a zero vector.
     if (ZMask == 0xf)
       return ZeroVector;
-    
-    // TODO: Model this case as two shuffles or a 'logical and' plus shuffle?
-    if (ZMask)
-      return nullptr;
-
-    assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
 
-    // If we're not zeroing anything, this is a single shuffle.
-    // Replace the selected destination lane with the selected source lane.
-    // For all other lanes, pass the first source bits through.
+    // Initialize by passing all of the first source bits through.
     int ShuffleMask[4] = { 0, 1, 2, 3 };
-    ShuffleMask[DestLane] = SourceLane + 4;
-    
-    return Builder.CreateShuffleVector(II.getArgOperand(0), II.getArgOperand(1),
-                                       ShuffleMask);
+
+    // We may replace the second operand with the zero vector.
+    Value *V1 = II.getArgOperand(1);
+
+    if (ZMask) {
+      // If the zero mask is being used with a single input or the zero mask
+      // overrides the destination lane, this is a shuffle with the zero vector.
+      if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
+          (ZMask & (1 << DestLane))) {
+        V1 = ZeroVector;
+        // We may still move 32-bits of the first source vector from one lane
+        // to another.
+        ShuffleMask[DestLane] = SourceLane;
+        // The zero mask may override the previous insert operation.
+        for (unsigned i = 0; i < 4; ++i)
+          if ((ZMask >> i) & 0x1)
+            ShuffleMask[i] = i + 4;
+      } else {
+        // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
+        return nullptr;
+      }
+    } else {
+      // Replace the selected destination lane with the selected source lane.
+      ShuffleMask[DestLane] = SourceLane + 4;
+    }
+  
+    return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
   }
   return nullptr;
 }
author	Sanjay Patel <spatel@rotateright.com>	2015-04-25 20:55:25 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-04-25 20:55:25 +0000
commit	c1d20a36fb5a6034c76a10bded21b43561ec2a3d (patch)
tree	7eb0144ffd7a10362ecbb0ed9cfadd9f0bd6fe6c /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
parent	3eb5146b3cc2c6e99943811d54f92e728c42d37b (diff)
download	bcm5719-llvm-c1d20a36fb5a6034c76a10bded21b43561ec2a3d.tar.gz bcm5719-llvm-c1d20a36fb5a6034c76a10bded21b43561ec2a3d.zip