[X86] Create PSUBUS from (add (umax X, C), -C)

InstCombine seems to canonicalize or PSUB patter into a max with the cosntant and an add with an inverse of the constant. This patch recognizes this pattern and turns it into PSUBUS. Future work could improve undef element handling. Fixes some of PR40053 Differential Revision: https://reviews.llvm.org/D55780 llvm-svn: 349519
author: Craig Topper <craig.topper@intel.com> 2018-12-18 18:26:25 +0000
committer: Craig Topper <craig.topper@intel.com> 2018-12-18 18:26:25 +0000
commit: 20a6db5a84ce8947d188becadd04f80ec353e805 (patch)
tree: ad9e00feaf24d0afbfc27d01fcd92a4f21b2fb13 /llvm/lib/Target
parent: b536bf52997cb764b24f9121212aa527b78571a6 (diff)
download: bcm5719-llvm-20a6db5a84ce8947d188becadd04f80ec353e805.tar.gz
bcm5719-llvm-20a6db5a84ce8947d188becadd04f80ec353e805.zip
1 files changed, 44 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cc044e1dbf6..7d131661649 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34108,6 +34108,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
           // If the RHS is a constant we have to reverse the const
           // canonicalization.
           // x > C-1 ? x+-C : 0 --> subus x, C
+          // TODO: Handle build_vectors with undef elements.
           auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
             return Cond->getAPIntValue() == (-Op->getAPIntValue() - 1);
           };
@@ -40611,6 +40612,46 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1,
                           PMADDBuilder);
 }
 
+// Try to turn (add (umax X, C), -C) into (psubus X, C)
+static SDValue combineAddToSUBUS(SDNode *N, SelectionDAG &DAG,
+                                 const X86Subtarget &Subtarget) {
+  if (!Subtarget.hasSSE2())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // psubus is available in SSE2 for i8 and i16 vectors.
+  if (!VT.isVector() || VT.getVectorNumElements() < 2 ||
+      !isPowerOf2_32(VT.getVectorNumElements()) ||
+      !(VT.getVectorElementType() == MVT::i8 ||
+        VT.getVectorElementType() == MVT::i16))
+    return SDValue();
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() != ISD::UMAX)
+    return SDValue();
+
+  // The add should have a constant that is the negative of the max.
+  // TODO: Handle build_vectors with undef elements.
+  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+    return Max->getAPIntValue() == (-Op->getAPIntValue());
+  };
+  if (!ISD::matchBinaryPredicate(Op0.getOperand(1), Op1, MatchUSUBSAT))
+    return SDValue();
+
+  auto USUBSATBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
+                           ArrayRef<SDValue> Ops) {
+    return DAG.getNode(ISD::USUBSAT, DL, Ops[0].getValueType(), Ops);
+  };
+
+  // Take both operands from the umax node.
+  SDLoc DL(N);
+  return SplitOpsAndApply(DAG, Subtarget, DL, VT,
+                          { Op0.getOperand(0), Op0.getOperand(1) },
+                          USUBSATBuilder);
+}
+
 // Attempt to turn this pattern into PMADDWD.
 // (mul (add (zext (build_vector)), (zext (build_vector))),
 //      (add (zext (build_vector)), (zext (build_vector)))
@@ -40766,6 +40807,9 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
   if (SDValue V = combineIncDecVector(N, DAG))
     return V;
 
+  if (SDValue V = combineAddToSUBUS(N, DAG, Subtarget))
+    return V;
+
   return combineAddOrSubToADCOrSBB(N, DAG);
 }
author	Craig Topper <craig.topper@intel.com>	2018-12-18 18:26:25 +0000
committer	Craig Topper <craig.topper@intel.com>	2018-12-18 18:26:25 +0000
commit	20a6db5a84ce8947d188becadd04f80ec353e805 (patch)
tree	ad9e00feaf24d0afbfc27d01fcd92a4f21b2fb13 /llvm/lib/Target
parent	b536bf52997cb764b24f9121212aa527b78571a6 (diff)
download	bcm5719-llvm-20a6db5a84ce8947d188becadd04f80ec353e805.tar.gz bcm5719-llvm-20a6db5a84ce8947d188becadd04f80ec353e805.zip