[X86] Teach lowerV4I32Shuffle to only use broadcasts if the mask has more than one undef element. Prioritize shifts over broadcast in lowerV8I16Shuffle.

The motivating case are the changes in vector-reduce-add.ll where we were doing extra work in the scalar domain instead of shuffling. There may be some one use check that needs to be looked into there, but this patch sidesteps the issue by avoiding broadcasts that aren't really broadcasting. Differential Revision: https://reviews.llvm.org/D66071 llvm-svn: 369287
author: Craig Topper <craig.topper@intel.com> 2019-08-19 18:15:50 +0000
committer: Craig Topper <craig.topper@intel.com> 2019-08-19 18:15:50 +0000
commit: a0d92c72620c49aa36b1738a272a2715f7909a6a (patch)
tree: 17423b1397d57b7dee76e2b5f5ec93536b512103 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent: a8abe1f82899847e29c4f1d66c32fad17dacb62f (diff)
download: bcm5719-llvm-a0d92c72620c49aa36b1738a272a2715f7909a6a.tar.gz
bcm5719-llvm-a0d92c72620c49aa36b1738a272a2715f7909a6a.zip
1 files changed, 11 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f0a4cf2aef8..a519c200e49 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13114,10 +13114,12 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
 
   if (NumV2Elements == 0) {
-    // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
-                                                    Mask, Subtarget, DAG))
-      return Broadcast;
+    // Try to use broadcast unless the mask only has one non-undef element.
+    if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) {
+      if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2,
+                                                      Mask, Subtarget, DAG))
+        return Broadcast;
+    }
 
     // Straight shuffle of a single input vector. For everything from SSE2
     // onward this has a single fast instruction with no scary immediates.
@@ -13798,16 +13800,16 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
   int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
 
   if (NumV2Inputs == 0) {
-    // Check for being able to broadcast a single element.
-    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
-                                                    Mask, Subtarget, DAG))
-      return Broadcast;
-
     // Try to use shift instructions.
     if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
                                             Zeroable, Subtarget, DAG))
       return Shift;
 
+    // Check for being able to broadcast a single element.
+    if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2,
+                                                    Mask, Subtarget, DAG))
+      return Broadcast;
+
     // Use dedicated unpack instructions for masks that match their pattern.
     if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
       return V;
author	Craig Topper <craig.topper@intel.com>	2019-08-19 18:15:50 +0000
committer	Craig Topper <craig.topper@intel.com>	2019-08-19 18:15:50 +0000
commit	a0d92c72620c49aa36b1738a272a2715f7909a6a (patch)
tree	17423b1397d57b7dee76e2b5f5ec93536b512103 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent	a8abe1f82899847e29c4f1d66c32fad17dacb62f (diff)
download	bcm5719-llvm-a0d92c72620c49aa36b1738a272a2715f7909a6a.tar.gz bcm5719-llvm-a0d92c72620c49aa36b1738a272a2715f7909a6a.zip