diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 74 |
1 files changed, 55 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b2cd55c61af..e2e63eb4c18 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1013,6 +1013,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); + + // These might be better off as horizontal vector ops. + setOperationAction(ISD::ADD, MVT::i16, Custom); + setOperationAction(ISD::ADD, MVT::i32, Custom); + setOperationAction(ISD::SUB, MVT::i16, Custom); + setOperationAction(ISD::SUB, MVT::i32, Custom); } if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { @@ -18487,21 +18493,28 @@ static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, /// Depending on uarch and/or optimizing for size, we might prefer to use a /// vector operation in place of the typical scalar operation. -static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - MVT VT = Op.getSimpleValueType(); - assert((VT == MVT::f32 || VT == MVT::f64) && "Only expecting float/double"); - +static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { // If both operands have other uses, this is probably not profitable. - // Horizontal FP add/sub were added with SSE3. SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); - if ((!LHS.hasOneUse() && !RHS.hasOneUse()) || !Subtarget.hasSSE3()) + if (!LHS.hasOneUse() && !RHS.hasOneUse()) return Op; + // FP horizontal add/sub were added with SSE3. Integer with SSSE3. + bool IsFP = Op.getSimpleValueType().isFloatingPoint(); + if (IsFP && !Subtarget.hasSSE3()) + return Op; + if (!IsFP && !Subtarget.hasSSSE3()) + return Op; + + // Defer forming the minimal horizontal op if the vector source has more than + // the 2 extract element uses that we're matching here. In that case, we might + // form a horizontal op that includes more than 1 add/sub op. if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - LHS.getOperand(0) != RHS.getOperand(0)) + LHS.getOperand(0) != RHS.getOperand(0) || + !LHS.getOperand(0)->hasNUsesOfValue(2, 0)) return Op; if (!isa<ConstantSDNode>(LHS.getOperand(1)) || @@ -18510,13 +18523,23 @@ static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG, return Op; // Allow commuted 'hadd' ops. - // TODO: Allow commuted fsub by negating the result of FHSUB? - // TODO: This can be extended to handle other adjacent extract pairs. - auto HOpcode = Op.getOpcode() == ISD::FADD ? X86ISD::FHADD : X86ISD::FHSUB; + // TODO: Allow commuted (f)sub by negating the result of (F)HSUB? + unsigned HOpcode; + switch (Op.getOpcode()) { + case ISD::ADD: HOpcode = X86ISD::HADD; break; + case ISD::SUB: HOpcode = X86ISD::HSUB; break; + case ISD::FADD: HOpcode = X86ISD::FHADD; break; + case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; + default: + llvm_unreachable("Trying to lower unsupported opcode to horizontal op"); + } unsigned LExtIndex = LHS.getConstantOperandVal(1); unsigned RExtIndex = RHS.getConstantOperandVal(1); - if (LExtIndex == 1 && RExtIndex == 0 && HOpcode == X86ISD::FHADD) + if (LExtIndex == 1 && RExtIndex == 0 && + (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD)) std::swap(LExtIndex, RExtIndex); + + // TODO: This can be extended to handle other adjacent extract pairs. if (LExtIndex != 0 || RExtIndex != 1) return Op; @@ -18533,15 +18556,23 @@ static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG, if (BitWidth == 256 || BitWidth == 512) X = extract128BitVector(X, 0, DAG, DL); - // fadd (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0 - // fadd (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0 - // fsub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0 - // The extract of element 0 is free: the scalar result is element 0. + // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0 + // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0 + // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0 SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, HOp, + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp, DAG.getIntPtrConstant(0, DL)); } +/// Depending on uarch and/or optimizing for size, we might prefer to use a +/// vector operation in place of the typical scalar operation. +static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + MVT VT = Op.getSimpleValueType(); + assert(VT == MVT::f32 || VT == MVT::f64 && "Only expecting float/double"); + return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); +} + /// The only differences between FABS and FNEG are the mask and the logic op. /// FNEG also has a folding opportunity for FNEG(FABS(x)). static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { @@ -23479,11 +23510,16 @@ static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) { DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2)); } -static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) { +static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { MVT VT = Op.getSimpleValueType(); + if (VT == MVT::i16 || VT == MVT::i32) + return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); + if (VT.getScalarType() == MVT::i1) return DAG.getNode(ISD::XOR, SDLoc(Op), VT, Op.getOperand(0), Op.getOperand(1)); + assert(Op.getSimpleValueType().is256BitVector() && Op.getSimpleValueType().isInteger() && "Only handle AVX 256-bit vector integer operation"); @@ -26219,7 +26255,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDCARRY: case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::ADD: - case ISD::SUB: return LowerADD_SUB(Op, DAG); + case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget); case ISD::UADDSAT: case ISD::SADDSAT: case ISD::USUBSAT: |