diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-03-19 16:24:55 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-03-19 16:24:55 +0000 |
commit | a56f2822d0d3ff3ac1d289a8b7834d7f90415f6d (patch) | |
tree | 748cca62b711d124248968e5ba7fdc72fff40864 /llvm/lib/Target | |
parent | c27df85299546b99e1d5892b0a53cef85c48958f (diff) | |
download | bcm5719-llvm-a56f2822d0d3ff3ac1d289a8b7834d7f90415f6d.tar.gz bcm5719-llvm-a56f2822d0d3ff3ac1d289a8b7834d7f90415f6d.zip |
[SelectionDAG] Handle unary SelectPatternFlavor for ABS case in SelectionDAGBuilder::visitSelect
These changes are related to PR37743 and include:
SelectionDAGBuilder::visitSelect handles the unary SelectPatternFlavor::SPF_ABS case to build ABS node.
Delete the redundant recognizer of the integer ABS pattern from the DAGCombiner.
Add promoting the integer ABS node in the LegalizeIntegerType.
Expand-based legalization of integer result for the ABS nodes.
Expand-based legalization of ABS vector operations.
Add some integer abs testcases for different typesizes for Thumb arch
Add the custom ABS expanding and change the SAD pattern recognizer for X86 arch: The i64 result of the ABS is expanded to:
tmp = (SRA, Hi, 31)
Lo = (UADDO tmp, Lo)
Hi = (XOR tmp, (ADDCARRY tmp, hi, Lo:1))
Lo = (XOR tmp, Lo)
The "detectZextAbsDiff" function is changed for the recognition of pattern with the ABS node. Given a ABS node, detect the following pattern:
(ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))).
Change integer abs testcases for codegen with the ABS node support for AArch64.
Indicate that the ABS is legal for the i64 type when the NEON is supported.
Change the integer abs testcases to show changing of codegen.
Add combine and legalization of ABS nodes for Thumb arch.
Extend 'matchSelectPattern' to recognize the ABS patterns with ICMP_SGE condition.
For discussion, see https://bugs.llvm.org/show_bug.cgi?id=37743
Patch by: @ikulagin (Ivan Kulagin)
Differential Revision: https://reviews.llvm.org/D49837
llvm-svn: 356468
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 54 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 104 |
4 files changed, 97 insertions, 66 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 9748e204a5d..c1482822bc6 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1207,6 +1207,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget->getPrefLoopAlignment()); setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); + + if (Subtarget->isThumb() || Subtarget->isThumb2()) + setTargetDAGCombine(ISD::ABS); } bool ARMTargetLowering::useSoftFloat() const { @@ -8180,6 +8183,10 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::INTRINSIC_WO_CHAIN: return ReplaceLongIntrinsic(N, Results, DAG); + case ISD::ABS: + lowerABS(N, Results, DAG); + return ; + } if (Res.getNode()) Results.push_back(Res); @@ -10377,6 +10384,19 @@ static SDValue PerformAddeSubeCombine(SDNode *N, return SDValue(); } +static SDValue PerformABSCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + SDValue res; + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (!TLI.expandABS(N, res, DAG)) + return SDValue(); + + return res; +} + /// PerformADDECombine - Target-specific dag combine transform from /// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or /// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL @@ -12884,6 +12904,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); @@ -14395,6 +14416,39 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDLoc(Op)).first; } +void ARMTargetLowering::lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + EVT VT = N->getValueType(0); + assert(VT == MVT::i64 && "Unexpected type (!= i64) on ABS."); + MVT HalfT = MVT::i32; + SDLoc dl(N); + SDValue Hi, Lo, Tmp; + + if (!isOperationLegalOrCustom(ISD::ADDCARRY, HalfT) || + !isOperationLegalOrCustom(ISD::UADDO, HalfT)) + return ; + + unsigned OpTypeBits = HalfT.getScalarSizeInBits(); + SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); + + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(0, dl, HalfT)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(1, dl, HalfT)); + + Tmp = DAG.getNode(ISD::SRA, dl, HalfT, Hi, + DAG.getConstant(OpTypeBits - 1, dl, + getShiftAmountTy(HalfT, DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, + SDValue(Lo.getNode(), 1)); + Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); + Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); + + Results.push_back(Lo); + Results.push_back(Hi); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index cc5c31063e4..97283831913 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -698,6 +698,8 @@ class VectorType; SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + void lowerABS(SDNode *N, SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d10f8bfb093..2b1003663b6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -551,7 +551,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // add/sub are legal for all supported vector VT's. setOperationAction(ISD::ADD, VT, Legal); setOperationAction(ISD::SUB, VT, Legal); - setOperationAction(ISD::ABS, VT, Custom); + if (VT.getSizeInBits() == 128) + setOperationAction(ISD::ABS, VT, Custom); // Vector instructions introduced in P8 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ba8848b63fd..04c58b482b5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -189,10 +189,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // Integer absolute. if (Subtarget.hasCMov()) { setOperationAction(ISD::ABS , MVT::i16 , Custom); - setOperationAction(ISD::ABS , MVT::i32 , Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::ABS , MVT::i64 , Custom); + setOperationAction(ISD::ABS , MVT::i32 , Custom); } + setOperationAction(ISD::ABS , MVT::i64 , Custom); // Funnel shifts. for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { @@ -26790,6 +26789,31 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(Res); return; } + case ISD::ABS: { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = N->getValueType(0); + assert(VT == MVT::i64 && "Unexpected type (!= i64) on ABS."); + MVT HalfT = MVT::i32; + SDValue Lo, Hi, Tmp; + SDVTList VTList = DAG.getVTList(HalfT, MVT::i1); + + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(0, dl, HalfT)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(0), + DAG.getConstant(1, dl, HalfT)); + Tmp = DAG.getNode( + ISD::SRA, dl, HalfT, Hi, + DAG.getConstant(HalfT.getSizeInBits() - 1, dl, + TLI.getShiftAmountTy(HalfT, DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::UADDO, dl, VTList, Tmp, Lo); + Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, Tmp, Hi, + SDValue(Lo.getNode(), 1)); + Hi = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Hi); + Lo = DAG.getNode(ISD::XOR, dl, HalfT, Tmp, Lo); + Results.push_back(Lo); + Results.push_back(Hi); + return; + } case ISD::SETCC: { // Widen v2i32 (setcc v2f32). This is really needed for AVX512VL when // setCC result type is v2i1 because type legalzation will end up with @@ -34010,66 +34034,16 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, return SDValue(); } -// Given a select, detect the following pattern: -// 1: %2 = zext <N x i8> %0 to <N x i32> -// 2: %3 = zext <N x i8> %1 to <N x i32> -// 3: %4 = sub nsw <N x i32> %2, %3 -// 4: %5 = icmp sgt <N x i32> %4, [0 x N] or [-1 x N] -// 5: %6 = sub nsw <N x i32> zeroinitializer, %4 -// 6: %7 = select <N x i1> %5, <N x i32> %4, <N x i32> %6 +// Given a ABS node, detect the following pattern: +// (ABS (SUB (ZERO_EXTEND a), (ZERO_EXTEND b))). // This is useful as it is the input into a SAD pattern. -static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0, - SDValue &Op1) { - // Check the condition of the select instruction is greater-than. - SDValue SetCC = Select->getOperand(0); - if (SetCC.getOpcode() != ISD::SETCC) - return false; - ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); - if (CC != ISD::SETGT && CC != ISD::SETLT) - return false; - - SDValue SelectOp1 = Select->getOperand(1); - SDValue SelectOp2 = Select->getOperand(2); - - // The following instructions assume SelectOp1 is the subtraction operand - // and SelectOp2 is the negation operand. - // In the case of SETLT this is the other way around. - if (CC == ISD::SETLT) - std::swap(SelectOp1, SelectOp2); - - // The second operand of the select should be the negation of the first - // operand, which is implemented as 0 - SelectOp1. - if (!(SelectOp2.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) && - SelectOp2.getOperand(1) == SelectOp1)) - return false; - - // The first operand of SetCC is the first operand of the select, which is the - // difference between the two input vectors. - if (SetCC.getOperand(0) != SelectOp1) - return false; - - // In SetLT case, The second operand of the comparison can be either 1 or 0. - APInt SplatVal; - if ((CC == ISD::SETLT) && - !((ISD::isConstantSplatVector(SetCC.getOperand(1).getNode(), SplatVal) && - SplatVal.isOneValue()) || - (ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode())))) - return false; - - // In SetGT case, The second operand of the comparison can be either -1 or 0. - if ((CC == ISD::SETGT) && - !(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) || - ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode()))) - return false; - - // The first operand of the select is the difference between the two input - // vectors. - if (SelectOp1.getOpcode() != ISD::SUB) +static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) { + SDValue AbsOp1 = Abs->getOperand(0); + if (AbsOp1.getOpcode() != ISD::SUB) return false; - Op0 = SelectOp1.getOperand(0); - Op1 = SelectOp1.getOperand(1); + Op0 = AbsOp1.getOperand(0); + Op1 = AbsOp1.getOperand(1); // Check if the operands of the sub are zero-extended from vectors of i8. if (Op0.getOpcode() != ISD::ZERO_EXTEND || @@ -34305,7 +34279,7 @@ static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, // If there was a match, we want Root to be a select that is the root of an // abs-diff pattern. - if (!Root || (Root.getOpcode() != ISD::VSELECT)) + if (!Root || Root.getOpcode() != ISD::ABS) return SDValue(); // Check whether we have an abs-diff pattern feeding into the select. @@ -41644,10 +41618,10 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, return SDValue(); // We know N is a reduction add, which means one of its operands is a phi. - // To match SAD, we need the other operand to be a vector select. - if (Op0.getOpcode() != ISD::VSELECT) + // To match SAD, we need the other operand to be a ABS. + if (Op0.getOpcode() != ISD::ABS) std::swap(Op0, Op1); - if (Op0.getOpcode() != ISD::VSELECT) + if (Op0.getOpcode() != ISD::ABS) return SDValue(); auto BuildPSADBW = [&](SDValue Op0, SDValue Op1) { @@ -41686,7 +41660,7 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG, Op0 = BuildPSADBW(SadOp0, SadOp1); // It's possible we have a sad on the other side too. - if (Op1.getOpcode() == ISD::VSELECT && + if (Op1.getOpcode() == ISD::ABS && detectZextAbsDiff(Op1, SadOp0, SadOp1)) { Op1 = BuildPSADBW(SadOp0, SadOp1); } |