summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp113
1 files changed, 113 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ad95d988e9a..62953f4be18 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1337,6 +1337,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::UMAAL: return "ARMISD::UMAAL";
case ARMISD::UMLAL: return "ARMISD::UMLAL";
case ARMISD::SMLAL: return "ARMISD::SMLAL";
+ case ARMISD::SMULWB: return "ARMISD::SMULWB";
+ case ARMISD::SMULWT: return "ARMISD::SMULWT";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
@@ -1446,6 +1448,40 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
// Lowering Code
//===----------------------------------------------------------------------===//
+static bool isSRL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSRA16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SRA)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+static bool isSHL16(const SDValue &Op) {
+ if (Op.getOpcode() != ISD::SHL)
+ return false;
+ if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+ return Const->getZExtValue() == 16;
+ return false;
+}
+
+// Check for a signed 16-bit value. We special case SRA because it makes it
+// more simple when also looking for SRAs that aren't sign extending a
+// smaller value. Without the check, we'd need to take extra care with
+// checking order for some operations.
+static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
+ if (isSRA16(Op))
+ return isSHL16(Op.getOperand(0));
+ return DAG.ComputeNumSignBits(Op) == 17;
+}
+
/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
switch (CC) {
@@ -9945,6 +9981,67 @@ static SDValue PerformANDCombine(SDNode *N,
return SDValue();
}
+// Try combining OR nodes to SMULWB, SMULWT.
+static SDValue PerformORCombineToSMULWBT(SDNode *OR,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!Subtarget->hasV6Ops() ||
+ (Subtarget->isThumb() &&
+ (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
+ return SDValue();
+
+ SDValue SRL = OR->getOperand(0);
+ SDValue SHL = OR->getOperand(1);
+
+ if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
+ SRL = OR->getOperand(1);
+ SHL = OR->getOperand(0);
+ }
+ if (!isSRL16(SRL) || !isSHL16(SHL))
+ return SDValue();
+
+ // The first operands to the shifts need to be the two results from the
+ // same smul_lohi node.
+ if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
+ SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
+ return SDValue();
+
+ SDNode *SMULLOHI = SRL.getOperand(0).getNode();
+ if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
+ SHL.getOperand(0) != SDValue(SMULLOHI, 1))
+ return SDValue();
+
+ // Now we have:
+ // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
+ // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
+ // For SMUWB the 16-bit value will signed extended somehow.
+ // For SMULWT only the SRA is required.
+ // Check both sides of SMUL_LOHI
+ SDValue OpS16 = SMULLOHI->getOperand(0);
+ SDValue OpS32 = SMULLOHI->getOperand(1);
+
+ SelectionDAG &DAG = DCI.DAG;
+ if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
+ OpS16 = OpS32;
+ OpS32 = SMULLOHI->getOperand(0);
+ }
+
+ SDLoc dl(OR);
+ unsigned Opcode = 0;
+ if (isS16(OpS16, DAG))
+ Opcode = ARMISD::SMULWB;
+ else if (isSRA16(OpS16)) {
+ Opcode = ARMISD::SMULWT;
+ OpS16 = OpS16->getOperand(0);
+ }
+ else
+ return SDValue();
+
+ SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
+ return SDValue(OR, 0);
+}
+
/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
static SDValue PerformORCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -9982,6 +10079,8 @@ static SDValue PerformORCombine(SDNode *N,
// fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
return Result;
+ if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
+ return Result;
}
// The code below optimizes (or (and X, Y), Z).
@@ -11781,6 +11880,20 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVLDCombine(N, DCI);
case ARMISD::BUILD_VECTOR:
return PerformARMBUILD_VECTORCombine(N, DCI);
+ case ARMISD::SMULWB: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
+ case ARMISD::SMULWT: {
+ unsigned BitWidth = N->getValueType(0).getSizeInBits();
+ APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
+ if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
+ return SDValue();
+ break;
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
OpenPOWER on IntegriCloud