diff options
| author | Eli Friedman <efriedma@codeaurora.org> | 2017-01-11 19:33:38 +0000 |
|---|---|---|
| committer | Eli Friedman <efriedma@codeaurora.org> | 2017-01-11 19:33:38 +0000 |
| commit | 3a03742c37bf6d38d2b37400a327bb6a0df82517 (patch) | |
| tree | a22a4763261be445a3dba5257f4d0b0888dad849 /llvm/lib/Target | |
| parent | f69e64662bda7498fd29418a54b4c5b8fb1e3476 (diff) | |
| download | bcm5719-llvm-3a03742c37bf6d38d2b37400a327bb6a0df82517.tar.gz bcm5719-llvm-3a03742c37bf6d38d2b37400a327bb6a0df82517.zip | |
[ARM] More aggressive matching for vpadd and vpaddl.
The new matchers work after legalization to make them simpler, and to avoid
blocking other optimizations.
Differential Revision: https://reviews.llvm.org/D27779
llvm-svn: 291693
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 108 |
1 files changed, 104 insertions, 4 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 441551595eb..9fefb977775 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9227,12 +9227,102 @@ SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, return SDValue(); } -// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction -// (only after legalization). -static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, +static bool IsVUZPShuffleNode(SDNode *N) { + // VUZP shuffle node. + if (N->getOpcode() == ARMISD::VUZP) + return true; + + // "VUZP" on i32 is an alias for VTRN. + if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32) + return true; + + return false; +} + +static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + // Look for ADD(VUZP.0, VUZP.1). + if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() || + N0 == N1) + return SDValue(); + + // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD. + if (!N->getValueType(0).is64BitVector()) + return SDValue(); + + // Generate vpadd. + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(N); + SDNode *Unzip = N0.getNode(); + EVT VT = N->getValueType(0); + + SmallVector<SDValue, 8> Ops; + Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Ops.push_back(Unzip->getOperand(0)); + Ops.push_back(Unzip->getOperand(1)); + + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); +} + +static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Check for two extended operands. + if (!(N0.getOpcode() == ISD::SIGN_EXTEND && + N1.getOpcode() == ISD::SIGN_EXTEND) && + !(N0.getOpcode() == ISD::ZERO_EXTEND && + N1.getOpcode() == ISD::ZERO_EXTEND)) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N10 = N1.getOperand(0); + + // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1)) + if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() || + N00 == N10) + return SDValue(); + + // We only recognize Q register paddl here; this can't be reached until + // after type legalization. + if (!N00.getValueType().is64BitVector() || + !N0.getValueType().is128BitVector()) + return SDValue(); + + // Generate vpaddl. + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(N); + EVT VT = N->getValueType(0); + + SmallVector<SDValue, 8> Ops; + // Form vpaddl.sN or vpaddl.uN depending on the kind of extension. + unsigned Opcode; + if (N0.getOpcode() == ISD::SIGN_EXTEND) + Opcode = Intrinsic::arm_neon_vpaddls; + else + Opcode = Intrinsic::arm_neon_vpaddlu; + Ops.push_back(DAG.getConstant(Opcode, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + EVT ElemTy = N00.getValueType().getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT, + N00.getOperand(0), N00.getOperand(1)); + Ops.push_back(Concat); + + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops); +} + +// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in +// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is +// much easier to match. +static SDValue +AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // Only perform optimization if after legalize, and if NEON is available. We // also expected both operands to be BUILD_VECTORs. if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() @@ -9288,6 +9378,10 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } + // Don't generate vpaddl+vmovn; we'll match it to vpadd later. + if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) + return SDValue(); + // Create VPADDL node. SelectionDAG &DAG = DCI.DAG; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -9559,9 +9653,15 @@ static SDValue PerformADDCCombine(SDNode *N, static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget){ + // Attempt to create vpadd for this add. + if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget)) + return Result; // Attempt to create vpaddl for this add. - if (SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget)) + if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget)) + return Result; + if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI, + Subtarget)) return Result; // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) |

