diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 6fe5e593314..59acc34906e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -232,6 +232,14 @@ private: void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); + /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs + /// should be 2 or 4. The opcode array specifies the instructions + /// used for 8, 16 and 32-bit lane sizes respectively, and each + /// pointer points to a set of NumVecs sub-opcodes used for the + /// different stages (e.g. VLD20 versus VLD21) of each load family. + void SelectMVE_VLD(SDNode *N, unsigned NumVecs, + const uint16_t *const *Opcodes); + /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used /// for loading D registers. @@ -2449,6 +2457,47 @@ void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); } +void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, + const uint16_t *const *Opcodes) { + EVT VT = N->getValueType(0); + SDLoc Loc(N); + + const uint16_t *OurOpcodes; + switch (VT.getVectorElementType().getSizeInBits()) { + case 8: + OurOpcodes = Opcodes[0]; + break; + case 16: + OurOpcodes = Opcodes[1]; + break; + case 32: + OurOpcodes = Opcodes[2]; + break; + default: + llvm_unreachable("bad vector element size in SelectMVE_VLD"); + } + + EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); + EVT ResultTys[] = {DataTy, MVT::Other}; + + auto Data = SDValue( + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); + SDValue Chain = N->getOperand(0); + for (unsigned Stage = 0; Stage < NumVecs; ++Stage) { + SDValue Ops[] = {Data, N->getOperand(2), Chain}; + auto LoadInst = + CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); + Data = SDValue(LoadInst, 0); + Chain = SDValue(LoadInst, 1); + } + + for (unsigned i = 0; i < NumVecs; i++) + ReplaceUses(SDValue(N, i), + CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, Data)); + ReplaceUses(SDValue(N, NumVecs), Chain); + CurDAG->RemoveDeadNode(N); +} + void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, unsigned NumVecs, const uint16_t *DOpcodes, @@ -4182,6 +4231,31 @@ void ARMDAGToDAGISel::Select(SDNode *N) { IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); return; } + + case Intrinsic::arm_mve_vld2q: { + static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; + static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, + ARM::MVE_VLD21_16}; + static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, + ARM::MVE_VLD21_32}; + static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; + SelectMVE_VLD(N, 2, Opcodes); + return; + } + + case Intrinsic::arm_mve_vld4q: { + static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, + ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; + static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, + ARM::MVE_VLD42_16, + ARM::MVE_VLD43_16}; + static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, + ARM::MVE_VLD42_32, + ARM::MVE_VLD43_32}; + static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; + SelectMVE_VLD(N, 4, Opcodes); + return; + } } break; } |

