[x86] scalarize extract element 0 of FP math

This is another step towards ensuring that we produce the optimal code for reductions, but there are other potential benefits as seen in the tests diffs: 1. Memory loads may get scalarized resulting in more efficient code. 2. Memory stores may get scalarized resulting in more efficient code. 3. Complex ops like fdiv/sqrt get scalarized which may be faster instructions depending on uarch. 4. Even simple ops like addss/subss/mulss/roundss may result in faster operation/less frequency throttling when scalarized depending on uarch. The TODO comment suggests 1 or more follow-ups for opcodes that can currently result in regressions. Differential Revision: https://reviews.llvm.org/D58282 llvm-svn: 355130
author: Sanjay Patel <spatel@rotateright.com> 2019-02-28 19:47:04 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2019-02-28 19:47:04 +0000
commit: 7fc6ef7dd7433c1947e1a00db0c32d1c68bf385f (patch)
tree: 8c5e21d6735a77017cca252210db17bf6c0c6584 /llvm/lib
parent: fadb22f4e2d64b614c87f48b625adaf217cc904f (diff)
download: bcm5719-llvm-7fc6ef7dd7433c1947e1a00db0c32d1c68bf385f.tar.gz
bcm5719-llvm-7fc6ef7dd7433c1947e1a00db0c32d1c68bf385f.zip
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ac49ba02351..de666798b03 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34240,6 +34240,62 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Extracting a scalar FP value from vector element 0 is free, so extract each
+/// operand first, then perform the math as a scalar op.
+static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
+  assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
+  SDValue Vec = ExtElt->getOperand(0);
+  SDValue Index = ExtElt->getOperand(1);
+  EVT VT = ExtElt->getValueType(0);
+  EVT VecVT = Vec.getValueType();
+
+  // TODO: If this is a unary/expensive/expand op, allow extraction from a
+  // non-zero element because the shuffle+scalar op will be cheaper?
+  if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT)
+    return SDValue();
+
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return SDValue();
+
+  // TODO: This switch could include FNEG, the x86-specific FP logic ops
+  // (FAND, FANDN, FOR, FXOR), FRSQRT/FRCP and other FP math ops. But that may
+  // require enhancements to avoid missed load folding and fma+fneg combining.
+  switch (Vec.getOpcode()) {
+  case ISD::FMA: // Begin 3 operands
+  case ISD::FMAD:
+  case ISD::FADD: // Begin 2 operands
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::FCOPYSIGN:
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:
+  case ISD::FMINNUM_IEEE:
+  case ISD::FMAXNUM_IEEE:
+  case ISD::FMAXIMUM:
+  case ISD::FMINIMUM:
+  case ISD::FABS: // Begin 1 operand
+  case ISD::FSQRT:
+  case ISD::FRINT:
+  case ISD::FCEIL:
+  case ISD::FTRUNC:
+  case ISD::FNEARBYINT:
+  case ISD::FROUND:
+  case ISD::FFLOOR: {
+    // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ...
+    SDLoc DL(ExtElt);
+    SmallVector<SDValue, 4> ExtOps;
+    for (SDValue Op : Vec->ops())
+      ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index));
+    return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps);
+  }
+  default:
+    return SDValue();
+  }
+  llvm_unreachable("All opcodes should return within switch");
+}
+
 /// Detect vector gather/scatter index generation and convert it from being a
 /// bunch of shuffles and extracts into a somewhat faster sequence.
 /// For i686, the best sequence is apparently storing the value and loading
@@ -34310,6 +34366,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
   if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
     return MinMax;
 
+  if (SDValue V = scalarizeExtEltFP(N, DAG))
+    return V;
+
   return SDValue();
 }
author	Sanjay Patel <spatel@rotateright.com>	2019-02-28 19:47:04 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2019-02-28 19:47:04 +0000
commit	7fc6ef7dd7433c1947e1a00db0c32d1c68bf385f (patch)
tree	8c5e21d6735a77017cca252210db17bf6c0c6584 /llvm/lib
parent	fadb22f4e2d64b614c87f48b625adaf217cc904f (diff)
download	bcm5719-llvm-7fc6ef7dd7433c1947e1a00db0c32d1c68bf385f.tar.gz bcm5719-llvm-7fc6ef7dd7433c1947e1a00db0c32d1c68bf385f.zip