summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSam Tebbs <sam.tebbs@arm.com>2019-06-28 15:43:31 +0000
committerSam Tebbs <sam.tebbs@arm.com>2019-06-28 15:43:31 +0000
commite39e958da36da52d34e883dd5820262e96a8781a (patch)
tree8531832a979744c37c7b8efe661600430bc5b2b9 /llvm/lib
parent176b9f651685c52bce25e700a758bd33e6a5354d (diff)
downloadbcm5719-llvm-e39e958da36da52d34e883dd5820262e96a8781a.tar.gz
bcm5719-llvm-e39e958da36da52d34e883dd5820262e96a8781a.zip
[ARM] Add support for the MVE long shift instructions
MVE adds the lsll, lsrl and asrl instructions, which perform a shift on a 64 bit value separated into two 32 bit registers. The Expand64BitShift function is modified to accept ISD::SHL, ISD::SRL and ISD::SRA and convert it into the appropriate opcode in ARMISD. An SHL is converted into an lsll, an SRL is converted into an lsrl for the immediate form and a negation and lsll for the register form, and SRA is converted into an asrl. test/CodeGen/ARM/shift_parts.ll is added to test the logic of emitting these instructions. Differential Revision: https://reviews.llvm.org/D63430 llvm-svn: 364654
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp57
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h4
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td11
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td20
4 files changed, 85 insertions, 7 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index ae7e2b6e34d..d2ef680524a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -932,6 +932,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, MVT::i64, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+ // MVE lowers 64 bit shifts to lsll and lsrl
+ // assuming that ISD::SRL and SRA of i64 are already marked custom
+ if (Subtarget->hasMVEIntegerOps())
+ setOperationAction(ISD::SHL, MVT::i64, Custom);
+
// Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
if (Subtarget->isThumb1Only()) {
setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
@@ -1411,6 +1416,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SSAT: return "ARMISD::SSAT";
case ARMISD::USAT: return "ARMISD::USAT";
+ case ARMISD::ASRL: return "ARMISD::ASRL";
+ case ARMISD::LSRL: return "ARMISD::LSRL";
+ case ARMISD::LSLL: return "ARMISD::LSLL";
+
case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
@@ -5619,11 +5628,54 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
if (VT != MVT::i64)
return SDValue();
- assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+ assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
+ N->getOpcode() == ISD::SHL) &&
"Unknown shift to lower!");
+ unsigned ShOpc = N->getOpcode();
+ if (ST->hasMVEIntegerOps()) {
+ SDValue ShAmt = N->getOperand(1);
+ unsigned ShPartsOpc = ARMISD::LSLL;
+ ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
+
+ // If the shift amount is greater than 32 then do the default optimisation
+ if (Con && Con->getZExtValue() > 32)
+ return SDValue();
+
+ // Extract the lower 32 bits of the shift amount if it's an i64
+ if (ShAmt->getValueType(0) == MVT::i64)
+ ShAmt = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, ShAmt,
+ DAG.getConstant(0, dl, MVT::i32));
+
+ if (ShOpc == ISD::SRL) {
+ if (!Con)
+ // There is no t2LSRLr instruction so negate and perform an lsll if the
+ // shift amount is in a register, emulating a right shift.
+ ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant(0, dl, MVT::i32), ShAmt);
+ else
+ // Else generate an lsrl on the immediate shift amount
+ ShPartsOpc = ARMISD::LSRL;
+ } else if (ShOpc == ISD::SRA)
+ ShPartsOpc = ARMISD::ASRL;
+
+ // Lower 32 bits of the destination/source
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(0, dl, MVT::i32));
+ // Upper 32 bits of the destination/source
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+ DAG.getConstant(1, dl, MVT::i32));
+
+ // Generate the shift operation as computed above
+ Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
+ ShAmt);
+ // The upper 32 bits come from the second return value of lsll
+ Hi = SDValue(Lo.getNode(), 1);
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+ }
+
// We only lower SRA, SRL of 1 here, all others use generic lowering.
- if (!isOneConstant(N->getOperand(1)))
+ if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
return SDValue();
// If we are in thumb mode, we don't have RRX.
@@ -8291,6 +8343,7 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
break;
case ISD::SRL:
case ISD::SRA:
+ case ISD::SHL:
Res = Expand64BitShift(N, DAG, Subtarget);
break;
case ISD::SREM:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 10a576caa67..1fb89c7eff4 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -76,6 +76,10 @@ class VectorType;
PIC_ADD, // Add with a PC operand and a PIC label.
+ ASRL, // MVE long arithmetic shift right.
+ LSRL, // MVE long shift right.
+ LSLL, // MVE long shift left.
+
CMP, // ARM compare instructions.
CMN, // ARM CMN instructions.
CMPZ, // ARM compare that sets only Z flag.
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 8557bccbdbe..3799acd855e 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -99,6 +99,13 @@ def SDT_LongMac : SDTypeProfile<2, 4, [SDTCisVT<0, i32>,
SDTCisSameAs<0, 4>,
SDTCisSameAs<0, 5>]>;
+// ARMlsll, ARMlsrl, ARMasrl
+def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisInt<4>]>;
+
def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>;
def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>;
def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>;
@@ -171,6 +178,10 @@ def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
+def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
+def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
+
def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index c92802d6d97..a4221c5014e 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -427,11 +427,21 @@ class MVE_ScalarShiftDRegReg<string iname, bit op5, bit op16,
let DecoderMethod = "DecodeMVEOverlappingLongShift";
}
-def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, 0b0>;
-def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?>;
-def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0>;
-def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?>;
-def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?>;
+def MVE_ASRLr : MVE_ScalarShiftDRegReg<"asrl", 0b1, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMasrl tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
+def MVE_ASRLi : MVE_ScalarShiftDRegImm<"asrl", 0b10, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMasrl tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+def MVE_LSLLr : MVE_ScalarShiftDRegReg<"lsll", 0b0, 0b0, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMlsll tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, rGPR:$Rm))]>;
+def MVE_LSLLi : MVE_ScalarShiftDRegImm<"lsll", 0b00, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMlsll tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
+def MVE_LSRL : MVE_ScalarShiftDRegImm<"lsrl", 0b01, ?, [(set tGPREven:$RdaLo, tGPROdd:$RdaHi,
+ (ARMlsrl tGPREven:$RdaLo_src,
+ tGPROdd:$RdaHi_src, (i32 imm:$imm)))]>;
def MVE_SQRSHRL : MVE_ScalarShiftDRegReg<"sqrshrl", 0b1, 0b1>;
def MVE_SQSHLL : MVE_ScalarShiftDRegImm<"sqshll", 0b11, 0b1>;
OpenPOWER on IntegriCloud