summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp74
1 files changed, 55 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b2cd55c61af..e2e63eb4c18 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1013,6 +1013,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+
+ // These might be better off as horizontal vector ops.
+ setOperationAction(ISD::ADD, MVT::i16, Custom);
+ setOperationAction(ISD::ADD, MVT::i32, Custom);
+ setOperationAction(ISD::SUB, MVT::i16, Custom);
+ setOperationAction(ISD::SUB, MVT::i32, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
@@ -18487,21 +18493,28 @@ static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG,
/// Depending on uarch and/or optimizing for size, we might prefer to use a
/// vector operation in place of the typical scalar operation.
-static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- MVT VT = Op.getSimpleValueType();
- assert((VT == MVT::f32 || VT == MVT::f64) && "Only expecting float/double");
-
+static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
// If both operands have other uses, this is probably not profitable.
- // Horizontal FP add/sub were added with SSE3.
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- if ((!LHS.hasOneUse() && !RHS.hasOneUse()) || !Subtarget.hasSSE3())
+ if (!LHS.hasOneUse() && !RHS.hasOneUse())
return Op;
+ // FP horizontal add/sub were added with SSE3. Integer with SSSE3.
+ bool IsFP = Op.getSimpleValueType().isFloatingPoint();
+ if (IsFP && !Subtarget.hasSSE3())
+ return Op;
+ if (!IsFP && !Subtarget.hasSSSE3())
+ return Op;
+
+ // Defer forming the minimal horizontal op if the vector source has more than
+ // the 2 extract element uses that we're matching here. In that case, we might
+ // form a horizontal op that includes more than 1 add/sub op.
if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- LHS.getOperand(0) != RHS.getOperand(0))
+ LHS.getOperand(0) != RHS.getOperand(0) ||
+ !LHS.getOperand(0)->hasNUsesOfValue(2, 0))
return Op;
if (!isa<ConstantSDNode>(LHS.getOperand(1)) ||
@@ -18510,13 +18523,23 @@ static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
return Op;
// Allow commuted 'hadd' ops.
- // TODO: Allow commuted fsub by negating the result of FHSUB?
- // TODO: This can be extended to handle other adjacent extract pairs.
- auto HOpcode = Op.getOpcode() == ISD::FADD ? X86ISD::FHADD : X86ISD::FHSUB;
+ // TODO: Allow commuted (f)sub by negating the result of (F)HSUB?
+ unsigned HOpcode;
+ switch (Op.getOpcode()) {
+ case ISD::ADD: HOpcode = X86ISD::HADD; break;
+ case ISD::SUB: HOpcode = X86ISD::HSUB; break;
+ case ISD::FADD: HOpcode = X86ISD::FHADD; break;
+ case ISD::FSUB: HOpcode = X86ISD::FHSUB; break;
+ default:
+ llvm_unreachable("Trying to lower unsupported opcode to horizontal op");
+ }
unsigned LExtIndex = LHS.getConstantOperandVal(1);
unsigned RExtIndex = RHS.getConstantOperandVal(1);
- if (LExtIndex == 1 && RExtIndex == 0 && HOpcode == X86ISD::FHADD)
+ if (LExtIndex == 1 && RExtIndex == 0 &&
+ (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD))
std::swap(LExtIndex, RExtIndex);
+
+ // TODO: This can be extended to handle other adjacent extract pairs.
if (LExtIndex != 0 || RExtIndex != 1)
return Op;
@@ -18533,15 +18556,23 @@ static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
if (BitWidth == 256 || BitWidth == 512)
X = extract128BitVector(X, 0, DAG, DL);
- // fadd (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
- // fadd (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
- // fsub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
- // The extract of element 0 is free: the scalar result is element 0.
+ // add (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hadd X, X), 0
+ // add (extractelt (X, 1), extractelt (X, 0)) --> extractelt (hadd X, X), 0
+ // sub (extractelt (X, 0), extractelt (X, 1)) --> extractelt (hsub X, X), 0
SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, HOp,
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp,
DAG.getIntPtrConstant(0, DL));
}
+/// Depending on uarch and/or optimizing for size, we might prefer to use a
+/// vector operation in place of the typical scalar operation.
+static SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ MVT VT = Op.getSimpleValueType();
+ assert(VT == MVT::f32 || VT == MVT::f64 && "Only expecting float/double");
+ return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+}
+
/// The only differences between FABS and FNEG are the mask and the logic op.
/// FNEG also has a folding opportunity for FNEG(FABS(x)).
static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
@@ -23479,11 +23510,16 @@ static SDValue split512IntArith(SDValue Op, SelectionDAG &DAG) {
DAG.getNode(Op.getOpcode(), dl, NewVT, LHS2, RHS2));
}
-static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
+static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
+ if (VT == MVT::i16 || VT == MVT::i32)
+ return lowerAddSubToHorizontalOp(Op, DAG, Subtarget);
+
if (VT.getScalarType() == MVT::i1)
return DAG.getNode(ISD::XOR, SDLoc(Op), VT,
Op.getOperand(0), Op.getOperand(1));
+
assert(Op.getSimpleValueType().is256BitVector() &&
Op.getSimpleValueType().isInteger() &&
"Only handle AVX 256-bit vector integer operation");
@@ -26219,7 +26255,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDCARRY:
case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
case ISD::ADD:
- case ISD::SUB: return LowerADD_SUB(Op, DAG);
+ case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget);
case ISD::UADDSAT:
case ISD::SADDSAT:
case ISD::USUBSAT:
OpenPOWER on IntegriCloud