diff options
| author | Sam Parker <sam.parker@arm.com> | 2019-07-01 08:21:28 +0000 |
|---|---|---|
| committer | Sam Parker <sam.parker@arm.com> | 2019-07-01 08:21:28 +0000 |
| commit | 98722691b0b5e375fdd02c9a464476752a3c598e (patch) | |
| tree | 749462ab6f4a19ba537c95094b76cf4cb9ebe44e /llvm/lib/Target/ARM/ARMISelLowering.cpp | |
| parent | 0384a780549a0b87cefc3e7d787787a5e5bb9527 (diff) | |
| download | bcm5719-llvm-98722691b0b5e375fdd02c9a464476752a3c598e.tar.gz bcm5719-llvm-98722691b0b5e375fdd02c9a464476752a3c598e.zip | |
[ARM] WLS/LE Code Generation
Backend changes to enable WLS/LE low-overhead loops for armv8.1-m:
1) Use TTI to communicate to the HardwareLoop pass that we should try
to generate intrinsics that guard the loop entry, as well as setting
the loop trip count.
2) Lower the BRCOND that uses said intrinsic to an Arm specific node:
ARMWLS.
3) ISelDAGToDAG the node to a new pseudo instruction:
t2WhileLoopStart.
4) Add support in ArmLowOverheadLoops to handle the new pseudo
instruction.
Differential Revision: https://reviews.llvm.org/D63816
llvm-svn: 364733
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index d2ef680524a..f2b6af1f1fd 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -633,6 +633,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (Subtarget->hasMVEIntegerOps()) addMVEVectorTypes(Subtarget->hasMVEFloatOps()); + // Combine low-overhead loop intrinsics so that we can lower i1 types. + if (Subtarget->hasLOB()) + setTargetDAGCombine(ISD::BRCOND); + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -1542,6 +1546,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; + case ARMISD::WLS: return "ARMISD::WLS"; } return nullptr; } @@ -12883,6 +12888,42 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D return V; } +static SDValue PerformHWLoopCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *ST) { + // Look for (brcond (xor test.set.loop.iterations, -1) + SDValue CC = N->getOperand(1); + + if (CC->getOpcode() != ISD::XOR && CC->getOpcode() != ISD::SETCC) + return SDValue(); + + if (CC->getOperand(0)->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return SDValue(); + + SDValue Int = CC->getOperand(0); + unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue(); + if (IntOp != Intrinsic::test_set_loop_iterations) + return SDValue(); + + if (auto *Const = dyn_cast<ConstantSDNode>(CC->getOperand(1))) + assert(Const->isOne() && "Expected to compare against 1"); + else + assert(Const->isOne() && "Expected to compare against 1"); + + SDLoc dl(Int); + SDValue Chain = N->getOperand(0); + SDValue Elements = Int.getOperand(2); + SDValue ExitBlock = N->getOperand(2); + + // TODO: Once we start supporting tail predication, we can add another + // operand to WLS for the number of elements processed in a vector loop. + + SDValue Ops[] = { Chain, Elements, ExitBlock }; + SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops); + DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0)); + return Res; +} + /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { @@ -13114,6 +13155,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); + case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget); case ARMISD::ADDC: case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget); |

