summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp18
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp83
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp200
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp8
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td23
5 files changed, 262 insertions, 70 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 4b589f5b447..b1c2031c7d7 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -4310,12 +4310,30 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
case ARM::VLD2DUPd8wb_register:
case ARM::VLD2DUPd16wb_register:
case ARM::VLD2DUPd32wb_register:
+ case ARM::VLD2DUPq8EvenPseudo:
+ case ARM::VLD2DUPq8OddPseudo:
+ case ARM::VLD2DUPq16EvenPseudo:
+ case ARM::VLD2DUPq16OddPseudo:
+ case ARM::VLD2DUPq32EvenPseudo:
+ case ARM::VLD2DUPq32OddPseudo:
+ case ARM::VLD3DUPq8EvenPseudo:
+ case ARM::VLD3DUPq8OddPseudo:
+ case ARM::VLD3DUPq16EvenPseudo:
+ case ARM::VLD3DUPq16OddPseudo:
+ case ARM::VLD3DUPq32EvenPseudo:
+ case ARM::VLD3DUPq32OddPseudo:
case ARM::VLD4DUPd8Pseudo:
case ARM::VLD4DUPd16Pseudo:
case ARM::VLD4DUPd32Pseudo:
case ARM::VLD4DUPd8Pseudo_UPD:
case ARM::VLD4DUPd16Pseudo_UPD:
case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD4DUPq8EvenPseudo:
+ case ARM::VLD4DUPq8OddPseudo:
+ case ARM::VLD4DUPq16EvenPseudo:
+ case ARM::VLD4DUPq16OddPseudo:
+ case ARM::VLD4DUPq32EvenPseudo:
+ case ARM::VLD4DUPq32OddPseudo:
case ARM::VLD1LNq8Pseudo:
case ARM::VLD1LNq16Pseudo:
case ARM::VLD1LNq32Pseudo:
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index d82bef5b759..439ba9e2c78 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -186,6 +186,13 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false},
{ ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false},
+{ ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false},
+{ ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false},
+{ ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false},
+{ ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false},
+{ ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false},
+{ ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false},
+
{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
@@ -213,6 +220,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
+{ ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true},
+{ ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true},
+{ ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true},
+{ ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true},
+{ ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true},
+{ ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true},
{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
@@ -248,6 +261,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
+{ ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true},
+{ ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true},
+{ ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true},
+{ ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true},
+{ ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true},
+{ ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true},
{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
@@ -463,15 +482,31 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
bool DstIsDead = MI.getOperand(OpIdx).isDead();
unsigned DstReg = MI.getOperand(OpIdx++).getReg();
- unsigned D0, D1, D2, D3;
- GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
- MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
- if (NumRegs > 1 && TableEntry->copyAllListRegs)
- MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
- if (NumRegs > 2 && TableEntry->copyAllListRegs)
- MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
- if (NumRegs > 3 && TableEntry->copyAllListRegs)
- MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 ||
+ TableEntry->RealOpc == ARM::VLD2DUPd16x2 ||
+ TableEntry->RealOpc == ARM::VLD2DUPd32x2) {
+ unsigned SubRegIndex;
+ if (RegSpc == EvenDblSpc) {
+ SubRegIndex = ARM::dsub_0;
+ } else {
+ assert(RegSpc == OddDblSpc && "Unexpected spacing!");
+ SubRegIndex = ARM::dsub_1;
+ }
+ unsigned SubReg = TRI->getSubReg(DstReg, SubRegIndex);
+ unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0,
+ &ARM::DPairSpcRegClass);
+ MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead));
+ } else {
+ unsigned D0, D1, D2, D3;
+ GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 1 && TableEntry->copyAllListRegs)
+ MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2 && TableEntry->copyAllListRegs)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3 && TableEntry->copyAllListRegs)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+ }
if (TableEntry->isUpdating)
MIB.add(MI.getOperand(OpIdx++));
@@ -510,10 +545,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
// has an extra operand that is a use of the super-register. Record the
// operand index and skip over it.
unsigned SrcOpIdx = 0;
- if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
- RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
- RegSpc == SingleHighTSpc)
- SrcOpIdx = OpIdx++;
+ if(TableEntry->RealOpc != ARM::VLD2DUPd8x2 &&
+ TableEntry->RealOpc != ARM::VLD2DUPd16x2 &&
+ TableEntry->RealOpc != ARM::VLD2DUPd32x2) {
+ if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
+ RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
+ RegSpc == SingleHighTSpc)
+ SrcOpIdx = OpIdx++;
+ }
// Copy the predicate operands.
MIB.add(MI.getOperand(OpIdx++));
@@ -1674,6 +1713,24 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
case ARM::VLD4DUPd8Pseudo_UPD:
case ARM::VLD4DUPd16Pseudo_UPD:
case ARM::VLD4DUPd32Pseudo_UPD:
+ case ARM::VLD2DUPq8EvenPseudo:
+ case ARM::VLD2DUPq8OddPseudo:
+ case ARM::VLD2DUPq16EvenPseudo:
+ case ARM::VLD2DUPq16OddPseudo:
+ case ARM::VLD2DUPq32EvenPseudo:
+ case ARM::VLD2DUPq32OddPseudo:
+ case ARM::VLD3DUPq8EvenPseudo:
+ case ARM::VLD3DUPq8OddPseudo:
+ case ARM::VLD3DUPq16EvenPseudo:
+ case ARM::VLD3DUPq16OddPseudo:
+ case ARM::VLD3DUPq32EvenPseudo:
+ case ARM::VLD3DUPq32OddPseudo:
+ case ARM::VLD4DUPq8EvenPseudo:
+ case ARM::VLD4DUPq8OddPseudo:
+ case ARM::VLD4DUPq16EvenPseudo:
+ case ARM::VLD4DUPq16OddPseudo:
+ case ARM::VLD4DUPq32EvenPseudo:
+ case ARM::VLD4DUPq32OddPseudo:
ExpandVLD(MBBI);
return true;
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c3c44d71092..b5eb57cfb8e 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -203,10 +203,11 @@ private:
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
- /// for loading D registers. (Q registers are not supported.)
- void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *DOpcodes,
- const uint16_t *QOpcodes = nullptr);
+ /// for loading D registers.
+ void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
+ unsigned NumVecs, const uint16_t *DOpcodes,
+ const uint16_t *QOpcodes0 = nullptr,
+ const uint16_t *QOpcodes1 = nullptr);
/// Try to select SBFX/UBFX instructions for ARM.
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -1747,7 +1748,9 @@ void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDLoc dl(N);
SDValue MemAddr, Align;
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
+ // nodes are not intrinsics.
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
@@ -1883,7 +1886,9 @@ void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
SDLoc dl(N);
SDValue MemAddr, Align;
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
+ // nodes are not intrinsics.
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
@@ -2033,7 +2038,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
SDLoc dl(N);
SDValue MemAddr, Align;
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
+ bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
+ // nodes are not intrinsics.
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
@@ -2149,21 +2156,22 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
CurDAG->RemoveDeadNode(N);
}
-void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
+ bool isUpdating, unsigned NumVecs,
const uint16_t *DOpcodes,
- const uint16_t *QOpcodes) {
+ const uint16_t *QOpcodes0,
+ const uint16_t *QOpcodes1) {
assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
SDLoc dl(N);
SDValue MemAddr, Align;
- if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+ unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
+ if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
return;
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
-
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
+ bool is64BitVector = VT.is64BitVector();
unsigned Alignment = 0;
if (NumVecs != 3) {
@@ -2180,49 +2188,84 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
}
Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
- unsigned Opc;
+ unsigned OpcodeIndex;
switch (VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("unhandled vld-dup type");
- case MVT::v8i8: Opc = DOpcodes[0]; break;
- case MVT::v16i8: Opc = QOpcodes[0]; break;
- case MVT::v4i16: Opc = DOpcodes[1]; break;
- case MVT::v8i16: Opc = QOpcodes[1]; break;
+ case MVT::v8i8:
+ case MVT::v16i8: OpcodeIndex = 0; break;
+ case MVT::v4i16:
+ case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v2f32:
- case MVT::v2i32: Opc = DOpcodes[2]; break;
+ case MVT::v2i32:
case MVT::v4f32:
- case MVT::v4i32: Opc = QOpcodes[2]; break;
- }
-
- SDValue Pred = getAL(CurDAG, dl);
- SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(MemAddr);
- Ops.push_back(Align);
- if (isUpdating) {
- // fixed-stride update instructions don't have an explicit writeback
- // operand. It's implicit in the opcode itself.
- SDValue Inc = N->getOperand(2);
- bool IsImmUpdate =
- isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
- if (NumVecs <= 2 && !IsImmUpdate)
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- if (!IsImmUpdate)
- Ops.push_back(Inc);
- // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
- else if (NumVecs > 2)
- Ops.push_back(Reg0);
+ case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v1f64:
+ case MVT::v1i64: OpcodeIndex = 3; break;
}
- Ops.push_back(Pred);
- Ops.push_back(Reg0);
- Ops.push_back(Chain);
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+
std::vector<EVT> ResTys;
- ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
+ ResTys.push_back(ResTy);
if (isUpdating)
ResTys.push_back(MVT::i32);
ResTys.push_back(MVT::Other);
- SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ SDValue Pred = getAL(CurDAG, dl);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+ SDNode *VLdDup;
+ if (is64BitVector || NumVecs == 1) {
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(MemAddr);
+ Ops.push_back(Align);
+ unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] :
+ QOpcodes0[OpcodeIndex];
+ if (isUpdating) {
+ // fixed-stride update instructions don't have an explicit writeback
+ // operand. It's implicit in the opcode itself.
+ SDValue Inc = N->getOperand(2);
+ bool IsImmUpdate =
+ isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
+ if (NumVecs <= 2 && !IsImmUpdate)
+ Opc = getVLDSTRegisterUpdateOpcode(Opc);
+ if (!IsImmUpdate)
+ Ops.push_back(Inc);
+ // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
+ else if (NumVecs > 2)
+ Ops.push_back(Reg0);
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0);
+ Ops.push_back(Chain);
+ VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ } else if (NumVecs == 2) {
+ const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
+ dl, ResTys, OpsA);
+
+ Chain = SDValue(VLdA, 1);
+ const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain };
+ VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
+ } else {
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex],
+ dl, ResTys, OpsA);
+
+ SDValue SuperReg = SDValue(VLdA, 0);
+ Chain = SDValue(VLdA, 1);
+ const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain };
+ VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB);
+ }
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
// Extract the subregisters.
@@ -2231,10 +2274,11 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
} else {
SDValue SuperReg = SDValue(VLdDup, 0);
static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
- unsigned SubIdx = ARM::dsub_0;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
ReplaceUses(SDValue(N, Vec),
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+ }
}
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
if (isUpdating)
@@ -3066,14 +3110,14 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1DUPd32 };
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
ARM::VLD1DUPq32 };
- SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
return;
}
case ARMISD::VLD2DUP: {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
ARM::VLD2DUPd32 };
- SelectVLDDup(N, false, 2, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
return;
}
@@ -3081,7 +3125,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
ARM::VLD3DUPd16Pseudo,
ARM::VLD3DUPd32Pseudo };
- SelectVLDDup(N, false, 3, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
return;
}
@@ -3089,7 +3133,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
ARM::VLD4DUPd16Pseudo,
ARM::VLD4DUPd32Pseudo };
- SelectVLDDup(N, false, 4, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
return;
}
@@ -3100,7 +3144,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
ARM::VLD1DUPq16wb_fixed,
ARM::VLD1DUPq32wb_fixed };
- SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
return;
}
@@ -3108,7 +3152,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
ARM::VLD2DUPd16wb_fixed,
ARM::VLD2DUPd32wb_fixed };
- SelectVLDDup(N, true, 2, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes);
return;
}
@@ -3116,7 +3160,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
ARM::VLD3DUPd16Pseudo_UPD,
ARM::VLD3DUPd32Pseudo_UPD };
- SelectVLDDup(N, true, 3, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes);
return;
}
@@ -3124,7 +3168,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
ARM::VLD4DUPd16Pseudo_UPD,
ARM::VLD4DUPd32Pseudo_UPD };
- SelectVLDDup(N, true, 4, Opcodes);
+ SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes);
return;
}
@@ -3531,6 +3575,52 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
+ case Intrinsic::arm_neon_vld2dup: {
+ static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
+ ARM::VLD2DUPd32, ARM::VLD1q64 };
+ static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
+ ARM::VLD2DUPq16EvenPseudo,
+ ARM::VLD2DUPq32EvenPseudo };
+ static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
+ ARM::VLD2DUPq16OddPseudo,
+ ARM::VLD2DUPq32OddPseudo };
+ SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
+ DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vld3dup: {
+ static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
+ ARM::VLD3DUPd16Pseudo,
+ ARM::VLD3DUPd32Pseudo,
+ ARM::VLD1d64TPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
+ ARM::VLD3DUPq16EvenPseudo,
+ ARM::VLD3DUPq32EvenPseudo };
+ static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
+ ARM::VLD3DUPq16OddPseudo,
+ ARM::VLD3DUPq32OddPseudo };
+ SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
+ DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
+ case Intrinsic::arm_neon_vld4dup: {
+ static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
+ ARM::VLD4DUPd16Pseudo,
+ ARM::VLD4DUPd32Pseudo,
+ ARM::VLD1d64QPseudo };
+ static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
+ ARM::VLD4DUPq16EvenPseudo,
+ ARM::VLD4DUPq32EvenPseudo };
+ static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
+ ARM::VLD4DUPq16OddPseudo,
+ ARM::VLD4DUPq32OddPseudo };
+ SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
+ DOpcodes, QOpcodes0, QOpcodes1);
+ return;
+ }
+
case Intrinsic::arm_neon_vld2lane: {
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
ARM::VLD2LNd16Pseudo,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 673bc8dd47a..f3288cb0cfa 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -12772,6 +12772,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vld2dup:
+ case Intrinsic::arm_neon_vld3dup:
+ case Intrinsic::arm_neon_vld4dup:
case Intrinsic::arm_neon_vst1:
case Intrinsic::arm_neon_vst1x2:
case Intrinsic::arm_neon_vst1x3:
@@ -14066,7 +14069,10 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::arm_neon_vld4:
case Intrinsic::arm_neon_vld2lane:
case Intrinsic::arm_neon_vld3lane:
- case Intrinsic::arm_neon_vld4lane: {
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vld2dup:
+ case Intrinsic::arm_neon_vld3dup:
+ case Intrinsic::arm_neon_vld4dup: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
// Conservatively set memVT to the entire set of vectors loaded.
auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 58fcf21dd19..4525eec8da0 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -209,7 +209,7 @@ def VecListDPairSpacedAllLanesAsmOperand : AsmOperandClass {
let ParserMethod = "parseVectorList";
let RenderMethod = "addVecListOperands";
}
-def VecListDPairSpacedAllLanes : RegisterOperand<DPair,
+def VecListDPairSpacedAllLanes : RegisterOperand<DPairSpc,
"printVectorListTwoSpacedAllLanes"> {
let ParserMatchClass = VecListDPairSpacedAllLanesAsmOperand;
}
@@ -1518,6 +1518,13 @@ def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
addrmode6dupalign64>;
+def VLD2DUPq8EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq8OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq16EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq16OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq32EvenPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+def VLD2DUPq32OddPseudo : VLDQQPseudo<IIC_VLD2dup>, Sched<[WriteVLD2]>;
+
// ...with address register writeback:
multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
Operand AddrMode> {
@@ -1578,6 +1585,13 @@ def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">;
def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
+def VLD3DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+def VLD3DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD3dup>, Sched<[WriteVLD2]>;
+
// ...with address register writeback:
class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
@@ -1624,6 +1638,13 @@ def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">;
def VLD4DUPq16 : VLD4DUP<{0,1,1,?}, "16">;
def VLD4DUPq32 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+def VLD4DUPq8EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPq8OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPq16EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPq16OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPq32EvenPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+def VLD4DUPq32OddPseudo : VLDQQQQPseudo<IIC_VLD4dup>, Sched<[WriteVLD2]>;
+
// ...with address register writeback:
class VLD4DUPWB<bits<4> op7_4, string Dt>
: NLdSt<1, 0b10, 0b1111, op7_4,
OpenPOWER on IntegriCloud