[X86] Don't use PMADDWD for vector add reductions of multiplies if the mul inputs have an additional user.

The pmaddwd inserts a truncate, if that truncate would end up creating additional instructions instead of making a zext narrower, then we shouldn't do it. I've restricted this to only sse4.1 targets since on prior targets the zext will be done in stages. So the truncate will probably not create additional instructions. Might need some more investigation of mul shrinking and the other pmaddwd transform to be sure this is the right decision. There might be a slight regression on AVX1 targets due to add splitting. Hard to say for sure. Maybe we need to look into using the vector reduction flag to use 2 narrow loads and a blend instead of extracting and inserting. llvm-svn: 367198
author: Craig Topper <craig.topper@intel.com> 2019-07-29 01:36:58 +0000
committer: Craig Topper <craig.topper@intel.com> 2019-07-29 01:36:58 +0000
commit: eb1beabad94f80d67f8508087af4e9bc0d59fbff (patch)
tree: a6b7168d51d69de1aa9f8697f0e9389e400fde7e /llvm/lib
parent: ac9d0f4150c8e8b452f51cd2249e177a41da863e (diff)
download: bcm5719-llvm-eb1beabad94f80d67f8508087af4e9bc0d59fbff.tar.gz
bcm5719-llvm-eb1beabad94f80d67f8508087af4e9bc0d59fbff.zip
1 files changed, 22 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 52ce9ec18e3..17481f7fb26 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43151,18 +43151,6 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
   if (!Subtarget.hasSSE2())
     return SDValue();
 
-  SDValue MulOp = N->getOperand(0);
-  SDValue OtherOp = N->getOperand(1);
-
-  if (MulOp.getOpcode() != ISD::MUL)
-    std::swap(MulOp, OtherOp);
-  if (MulOp.getOpcode() != ISD::MUL)
-    return SDValue();
-
-  ShrinkMode Mode;
-  if (!canReduceVMulWidth(MulOp.getNode(), DAG, Mode) || Mode == MULU16)
-    return SDValue();
-
   EVT VT = N->getValueType(0);
 
   // If the vector size is less than 128, or greater than the supported RegSize,
@@ -43170,6 +43158,28 @@ static SDValue combineLoopMAddPattern(SDNode *N, SelectionDAG &DAG,
   if (!VT.isVector() || VT.getVectorNumElements() < 8)
     return SDValue();
 
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  auto UsePMADDWD = [&](SDValue Op) {
+    ShrinkMode Mode;
+    return Op.getOpcode() == ISD::MUL &&
+           canReduceVMulWidth(Op.getNode(), DAG, Mode) && Mode != MULU16 &&
+           (!Subtarget.hasSSE41() ||
+            (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+             Op->isOnlyUserOf(Op.getOperand(1).getNode())));
+  };
+
+  SDValue MulOp, OtherOp;
+  if (UsePMADDWD(Op0)) {
+    MulOp = Op0;
+    OtherOp = Op1;
+  } else if (UsePMADDWD(Op1)) {
+    MulOp = Op1;
+    OtherOp = Op0;
+  } else
+   return SDValue();
+
   SDLoc DL(N);
   EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
                                    VT.getVectorNumElements());
author	Craig Topper <craig.topper@intel.com>	2019-07-29 01:36:58 +0000
committer	Craig Topper <craig.topper@intel.com>	2019-07-29 01:36:58 +0000
commit	eb1beabad94f80d67f8508087af4e9bc0d59fbff (patch)
tree	a6b7168d51d69de1aa9f8697f0e9389e400fde7e /llvm/lib
parent	ac9d0f4150c8e8b452f51cd2249e177a41da863e (diff)
download	bcm5719-llvm-eb1beabad94f80d67f8508087af4e9bc0d59fbff.tar.gz bcm5719-llvm-eb1beabad94f80d67f8508087af4e9bc0d59fbff.zip