summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp59
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ac49ba02351..de666798b03 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34240,6 +34240,62 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// Extracting a scalar FP value from vector element 0 is free, so extract each
+/// operand first, then perform the math as a scalar op.
+static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
+ assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract");
+ SDValue Vec = ExtElt->getOperand(0);
+ SDValue Index = ExtElt->getOperand(1);
+ EVT VT = ExtElt->getValueType(0);
+ EVT VecVT = Vec.getValueType();
+
+ // TODO: If this is a unary/expensive/expand op, allow extraction from a
+ // non-zero element because the shuffle+scalar op will be cheaper?
+ if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT)
+ return SDValue();
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+
+ // TODO: This switch could include FNEG, the x86-specific FP logic ops
+ // (FAND, FANDN, FOR, FXOR), FRSQRT/FRCP and other FP math ops. But that may
+ // require enhancements to avoid missed load folding and fma+fneg combining.
+ switch (Vec.getOpcode()) {
+ case ISD::FMA: // Begin 3 operands
+ case ISD::FMAD:
+ case ISD::FADD: // Begin 2 operands
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::FCOPYSIGN:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM_IEEE:
+ case ISD::FMAXNUM_IEEE:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ case ISD::FABS: // Begin 1 operand
+ case ISD::FSQRT:
+ case ISD::FRINT:
+ case ISD::FCEIL:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FROUND:
+ case ISD::FFLOOR: {
+ // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ...
+ SDLoc DL(ExtElt);
+ SmallVector<SDValue, 4> ExtOps;
+ for (SDValue Op : Vec->ops())
+ ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index));
+ return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps);
+ }
+ default:
+ return SDValue();
+ }
+ llvm_unreachable("All opcodes should return within switch");
+}
+
/// Detect vector gather/scatter index generation and convert it from being a
/// bunch of shuffles and extracts into a somewhat faster sequence.
/// For i686, the best sequence is apparently storing the value and loading
@@ -34310,6 +34366,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget))
return MinMax;
+ if (SDValue V = scalarizeExtEltFP(N, DAG))
+ return V;
+
return SDValue();
}
OpenPOWER on IntegriCloud