[ARM] WLS/LE Code Generation

Backend changes to enable WLS/LE low-overhead loops for armv8.1-m: 1) Use TTI to communicate to the HardwareLoop pass that we should try to generate intrinsics that guard the loop entry, as well as setting the loop trip count. 2) Lower the BRCOND that uses said intrinsic to an Arm specific node: ARMWLS. 3) ISelDAGToDAG the node to a new pseudo instruction: t2WhileLoopStart. 4) Add support in ArmLowOverheadLoops to handle the new pseudo instruction. Differential Revision: https://reviews.llvm.org/D63816 llvm-svn: 364733
author: Sam Parker <sam.parker@arm.com> 2019-07-01 08:21:28 +0000
committer: Sam Parker <sam.parker@arm.com> 2019-07-01 08:21:28 +0000
commit: 98722691b0b5e375fdd02c9a464476752a3c598e (patch)
tree: 749462ab6f4a19ba537c95094b76cf4cb9ebe44e /llvm/lib/Target/ARM/ARMISelLowering.cpp
parent: 0384a780549a0b87cefc3e7d787787a5e5bb9527 (diff)
download: bcm5719-llvm-98722691b0b5e375fdd02c9a464476752a3c598e.tar.gz
bcm5719-llvm-98722691b0b5e375fdd02c9a464476752a3c598e.zip
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d2ef680524a..f2b6af1f1fd 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -633,6 +633,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   if (Subtarget->hasMVEIntegerOps())
     addMVEVectorTypes(Subtarget->hasMVEFloatOps());
 
+  // Combine low-overhead loop intrinsics so that we can lower i1 types.
+  if (Subtarget->hasLOB())
+    setTargetDAGCombine(ISD::BRCOND);
+
   if (Subtarget->hasNEON()) {
     addDRTypeForNEON(MVT::v2f32);
     addDRTypeForNEON(MVT::v8i8);
@@ -1542,6 +1546,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
+  case ARMISD::WLS:           return "ARMISD::WLS";
   }
   return nullptr;
 }
@@ -12883,6 +12888,42 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
   return V;
 }
 
+static SDValue PerformHWLoopCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const ARMSubtarget *ST) {
+  // Look for (brcond (xor test.set.loop.iterations, -1)
+  SDValue CC = N->getOperand(1);
+
+  if (CC->getOpcode() != ISD::XOR && CC->getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  if (CC->getOperand(0)->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return SDValue();
+
+  SDValue Int = CC->getOperand(0);
+  unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
+  if (IntOp != Intrinsic::test_set_loop_iterations)
+    return SDValue();
+
+  if (auto *Const = dyn_cast<ConstantSDNode>(CC->getOperand(1)))
+    assert(Const->isOne() && "Expected to compare against 1");
+  else
+    assert(Const->isOne() && "Expected to compare against 1");
+
+  SDLoc dl(Int);
+  SDValue Chain = N->getOperand(0);
+  SDValue Elements = Int.getOperand(2);
+  SDValue ExitBlock = N->getOperand(2);
+
+  // TODO: Once we start supporting tail predication, we can add another
+  // operand to WLS for the number of elements processed in a vector loop.
+
+  SDValue Ops[] = { Chain, Elements, ExitBlock };
+  SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+  DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
+  return Res;
+}
+
 /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
 SDValue
 ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
@@ -13114,6 +13155,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
   case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
   case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
+  case ISD::BRCOND:     return PerformHWLoopCombine(N, DCI, Subtarget);
   case ARMISD::ADDC:
   case ARMISD::SUBC:    return PerformAddcSubcCombine(N, DCI, Subtarget);
   case ARMISD::SUBE:    return PerformAddeSubeCombine(N, DCI, Subtarget);
author	Sam Parker <sam.parker@arm.com>	2019-07-01 08:21:28 +0000
committer	Sam Parker <sam.parker@arm.com>	2019-07-01 08:21:28 +0000
commit	98722691b0b5e375fdd02c9a464476752a3c598e (patch)
tree	749462ab6f4a19ba537c95094b76cf4cb9ebe44e /llvm/lib/Target/ARM/ARMISelLowering.cpp
parent	0384a780549a0b87cefc3e7d787787a5e5bb9527 (diff)
download	bcm5719-llvm-98722691b0b5e375fdd02c9a464476752a3c598e.tar.gz bcm5719-llvm-98722691b0b5e375fdd02c9a464476752a3c598e.zip