summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp25
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp183
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td10
4 files changed, 183 insertions, 37 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b349627b67b..d0cd56fd5a3 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2998,13 +2998,26 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
- case ARMISD::WLS: {
- SDValue Ops[] = { N->getOperand(1), // Loop count
- N->getOperand(2), // Exit target
+ case ARMISD::WLS:
+ case ARMISD::LE: {
+ SDValue Ops[] = { N->getOperand(1),
+ N->getOperand(2),
N->getOperand(0) };
- SDNode *LoopStart =
- CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, Ops);
- ReplaceUses(N, LoopStart);
+ unsigned Opc = N->getOpcode() == ARMISD::WLS ?
+ ARM::t2WhileLoopStart : ARM::t2LoopEnd;
+ SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
+ ReplaceUses(N, New);
+ CurDAG->RemoveDeadNode(N);
+ return;
+ }
+ case ARMISD::LOOP_DEC: {
+ SDValue Ops[] = { N->getOperand(1),
+ N->getOperand(2),
+ N->getOperand(0) };
+ SDNode *Dec =
+ CurDAG->getMachineNode(ARM::t2LoopDec, dl,
+ CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
+ ReplaceUses(N, Dec);
CurDAG->RemoveDeadNode(N);
return;
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fe620e02bfa..222b5bca7a6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -669,8 +669,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
addMVEVectorTypes(Subtarget->hasMVEFloatOps());
// Combine low-overhead loop intrinsics so that we can lower i1 types.
- if (Subtarget->hasLOB())
+ if (Subtarget->hasLOB()) {
setTargetDAGCombine(ISD::BRCOND);
+ setTargetDAGCombine(ISD::BR_CC);
+ }
if (Subtarget->hasNEON()) {
addDRTypeForNEON(MVT::v2f32);
@@ -1589,6 +1591,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
case ARMISD::WLS: return "ARMISD::WLS";
+ case ARMISD::LE: return "ARMISD::LE";
+ case ARMISD::LOOP_DEC: return "ARMISD::LOOP_DEC";
}
return nullptr;
}
@@ -13034,43 +13038,169 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
return V;
}
+// Given N, the value controlling the conditional branch, search for the loop
+// intrinsic, returning it, along with how the value is used. We need to handle
+// patterns such as the following:
+// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
+// (brcond (setcc (loop.decrement), 0, eq), exit)
+// (brcond (setcc (loop.decrement), 0, ne), header)
+static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm,
+ bool &Negate) {
+ switch (N->getOpcode()) {
+ default:
+ break;
+ case ISD::XOR: {
+ if (!isa<ConstantSDNode>(N.getOperand(1)))
+ return SDValue();
+ if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
+ return SDValue();
+ Negate = !Negate;
+ return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
+ }
+ case ISD::SETCC: {
+ auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!Const)
+ return SDValue();
+ if (Const->isNullValue())
+ Imm = 0;
+ else if (Const->isOne())
+ Imm = 1;
+ else
+ return SDValue();
+ CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
+ return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntOp = cast<ConstantSDNode>(N.getOperand(1))->getZExtValue();
+ if (IntOp != Intrinsic::test_set_loop_iterations &&
+ IntOp != Intrinsic::loop_decrement_reg)
+ return SDValue();
+ return N;
+ }
+ }
+ return SDValue();
+}
+
static SDValue PerformHWLoopCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *ST) {
- // Look for (brcond (xor test.set.loop.iterations, -1)
- SDValue CC = N->getOperand(1);
- unsigned Opc = CC->getOpcode();
- SDValue Int;
- if ((Opc == ISD::XOR || Opc == ISD::SETCC) &&
- (CC->getOperand(0)->getOpcode() == ISD::INTRINSIC_W_CHAIN)) {
+ // The hwloop intrinsics that we're interested are used for control-flow,
+ // either for entering or exiting the loop:
+ // - test.set.loop.iterations will test whether its operand is zero. If it
+ // is zero, the proceeding branch should not enter the loop.
+ // - loop.decrement.reg also tests whether its operand is zero. If it is
+ // zero, the proceeding branch should not branch back to the beginning of
+ // the loop.
+ // So here, we need to check that how the brcond is using the result of each
+ // of the intrinsics to ensure that we're branching to the right place at the
+ // right time.
+
+ ISD::CondCode CC;
+ SDValue Cond;
+ int Imm = 1;
+ bool Negate = false;
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest;
- assert((isa<ConstantSDNode>(CC->getOperand(1)) &&
- cast<ConstantSDNode>(CC->getOperand(1))->isOne()) &&
- "Expected to compare against 1");
+ if (N->getOpcode() == ISD::BRCOND) {
+ CC = ISD::SETEQ;
+ Cond = N->getOperand(1);
+ Dest = N->getOperand(2);
+ } else {
+ assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
+ CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ Cond = N->getOperand(2);
+ Dest = N->getOperand(4);
+ if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
+ if (!Const->isOne() && !Const->isNullValue())
+ return SDValue();
+ Imm = Const->getZExtValue();
+ } else
+ return SDValue();
+ }
- Int = CC->getOperand(0);
- } else if (CC->getOpcode() == ISD::INTRINSIC_W_CHAIN)
- Int = CC;
- else
+ SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
+ if (!Int)
return SDValue();
- unsigned IntOp = cast<ConstantSDNode>(Int.getOperand(1))->getZExtValue();
- if (IntOp != Intrinsic::test_set_loop_iterations)
- return SDValue();
+ if (Negate)
+ CC = ISD::getSetCCInverse(CC, true);
+
+ auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
+ return (CC == ISD::SETEQ && Imm == 0) ||
+ (CC == ISD::SETNE && Imm == 1) ||
+ (CC == ISD::SETLT && Imm == 1) ||
+ (CC == ISD::SETULT && Imm == 1);
+ };
+
+ auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
+ return (CC == ISD::SETEQ && Imm == 1) ||
+ (CC == ISD::SETNE && Imm == 0) ||
+ (CC == ISD::SETGT && Imm == 0) ||
+ (CC == ISD::SETUGT && Imm == 0) ||
+ (CC == ISD::SETGE && Imm == 1) ||
+ (CC == ISD::SETUGE && Imm == 1);
+ };
+
+ assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
+ "unsupported condition");
SDLoc dl(Int);
- SDValue Chain = N->getOperand(0);
+ SelectionDAG &DAG = DCI.DAG;
SDValue Elements = Int.getOperand(2);
- SDValue ExitBlock = N->getOperand(2);
+ unsigned IntOp = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
+ assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
+ && "expected single br user");
+ SDNode *Br = *N->use_begin();
+ SDValue OtherTarget = Br->getOperand(1);
+
+ // Update the unconditional branch to branch to the given Dest.
+ auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
+ SDValue NewBrOps[] = { Br->getOperand(0), Dest };
+ SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
+ };
- // TODO: Once we start supporting tail predication, we can add another
- // operand to WLS for the number of elements processed in a vector loop.
+ if (IntOp == Intrinsic::test_set_loop_iterations) {
+ SDValue Res;
+ // We expect this 'instruction' to branch when the counter is zero.
+ if (IsTrueIfZero(CC, Imm)) {
+ SDValue Ops[] = { Chain, Elements, Dest };
+ Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+ } else {
+ // The logic is the reverse of what we need for WLS, so find the other
+ // basic block target: the target of the proceeding br.
+ UpdateUncondBr(Br, Dest, DAG);
- SDValue Ops[] = { Chain, Elements, ExitBlock };
- SDValue Res = DCI.DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
- DCI.DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
- return Res;
+ SDValue Ops[] = { Chain, Elements, OtherTarget };
+ Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
+ }
+ DAG.ReplaceAllUsesOfValueWith(Int.getValue(1), Int.getOperand(0));
+ return Res;
+ } else {
+ SDValue Size = DAG.getTargetConstant(
+ cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, MVT::i32);
+ SDValue Args[] = { Int.getOperand(0), Elements, Size, };
+ SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
+ DAG.getVTList(MVT::i32, MVT::Other), Args);
+ DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
+
+ // We expect this instruction to branch when the count is not zero.
+ SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
+
+ // Update the unconditional branch to target the loop preheader if we've
+ // found the condition has been reversed.
+ if (Target == OtherTarget)
+ UpdateUncondBr(Br, Dest, DAG);
+
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ SDValue(LoopDec.getNode(), 1), Chain);
+
+ SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
+ return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
+ }
+ return SDValue();
}
/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
@@ -13304,7 +13434,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
- case ISD::BRCOND: return PerformHWLoopCombine(N, DCI, Subtarget);
+ case ISD::BRCOND:
+ case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
case ARMISD::ADDC:
case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index b14c6487209..84f2f7239fe 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -126,6 +126,8 @@ class VectorType;
WIN__DBZCHK, // Windows' divide by zero check
WLS, // Low-overhead loops, While Loop Start
+ LOOP_DEC, // Really a part of LE, performs the sub
+ LE, // Low-overhead loops, Loop End
VCEQ, // Vector compare equal.
VCEQZ, // Vector compare equal to zero.
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index e3514546385..74f0c3dd964 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -108,8 +108,8 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
// TODO Add another operand for 'Size' so that we can re-use this node when we
// start supporting *TP versions.
-def SDT_ARMWhileLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, OtherVT>]>;
+def SDT_ARMLoLoop : SDTypeProfile<0, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, OtherVT>]>;
def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
@@ -265,9 +265,9 @@ def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;
-def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
- [SDNPHasChain]>;
-
+def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMLoLoop, [SDNPHasChain]>;
+def ARMLE : SDNode<"ARMISD::LE", SDT_ARMLoLoop, [SDNPHasChain]>;
+def ARMLoopDec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
OpenPOWER on IntegriCloud