diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ac49ba02351..de666798b03 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -34240,6 +34240,62 @@ static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// Extracting a scalar FP value from vector element 0 is free, so extract each +/// operand first, then perform the math as a scalar op. +static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { + assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract"); + SDValue Vec = ExtElt->getOperand(0); + SDValue Index = ExtElt->getOperand(1); + EVT VT = ExtElt->getValueType(0); + EVT VecVT = Vec.getValueType(); + + // TODO: If this is a unary/expensive/expand op, allow extraction from a + // non-zero element because the shuffle+scalar op will be cheaper? + if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT) + return SDValue(); + + if (VT != MVT::f32 && VT != MVT::f64) + return SDValue(); + + // TODO: This switch could include FNEG, the x86-specific FP logic ops + // (FAND, FANDN, FOR, FXOR), FRSQRT/FRCP and other FP math ops. But that may + // require enhancements to avoid missed load folding and fma+fneg combining. + switch (Vec.getOpcode()) { + case ISD::FMA: // Begin 3 operands + case ISD::FMAD: + case ISD::FADD: // Begin 2 operands + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::FCOPYSIGN: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: + case ISD::FABS: // Begin 1 operand + case ISD::FSQRT: + case ISD::FRINT: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FFLOOR: { + // extract (fp X, Y, ...), 0 --> fp (extract X, 0), (extract Y, 0), ... + SDLoc DL(ExtElt); + SmallVector<SDValue, 4> ExtOps; + for (SDValue Op : Vec->ops()) + ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index)); + return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps); + } + default: + return SDValue(); + } + llvm_unreachable("All opcodes should return within switch"); +} + /// Detect vector gather/scatter index generation and convert it from being a /// bunch of shuffles and extracts into a somewhat faster sequence. /// For i686, the best sequence is apparently storing the value and loading @@ -34310,6 +34366,9 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, if (SDValue MinMax = combineHorizontalMinMaxResult(N, DAG, Subtarget)) return MinMax; + if (SDValue V = scalarizeExtEltFP(N, DAG)) + return V; + return SDValue(); } |