diff options
author | Eli Friedman <efriedma@codeaurora.org> | 2016-12-14 20:25:26 +0000 |
---|---|---|
committer | Eli Friedman <efriedma@codeaurora.org> | 2016-12-14 20:25:26 +0000 |
commit | 10576e73c9a814d06cc86ff2b35ceae6dbc7af85 (patch) | |
tree | 01602f6b70c2c310f4021604a93273b5f70c1873 /llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | |
parent | 43c8b6b7b2277e1ae07ca86ee58859a5aab6acc1 (diff) | |
download | bcm5719-llvm-10576e73c9a814d06cc86ff2b35ceae6dbc7af85.tar.gz bcm5719-llvm-10576e73c9a814d06cc86ff2b35ceae6dbc7af85.zip |
[ARM] Add ARMISD::VLD1DUP to match vld1_dup more consistently.
Currently, there are substantial problems forming vld1_dup even if the
VDUP survives legalization. The lack of an actual node
leads to terrible results: not only can we not form post-increment vld1_dup
instructions, but we form scalar pre-increment and post-increment
loads which force the loaded value into a GPR. This patch fixes that
by combining the vdup+load into an ARMISD node before DAGCombine
messes it up.
Also includes a crash fix for vld2_dup (see testcase @vld2dupi8_postinc_variable).
Differential Revision: https://reviews.llvm.org/D27694
llvm-svn: 289703
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 74 |
1 files changed, 58 insertions, 16 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index e35ec3f8a3a..e257f2faf83 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -222,10 +222,11 @@ private: const uint16_t *QOpcodes); /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs - /// should be 2, 3 or 4. The opcode array specifies the instructions used + /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used /// for loading D registers. (Q registers are not supported.) void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes); + const uint16_t *DOpcodes, + const uint16_t *QOpcodes = nullptr); /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be @@ -1761,6 +1762,12 @@ static bool isVLDfixed(unsigned Opc) case ARM::VLD1q16wb_fixed : return true; case ARM::VLD1q32wb_fixed : return true; case ARM::VLD1q64wb_fixed : return true; + case ARM::VLD1DUPd8wb_fixed : return true; + case ARM::VLD1DUPd16wb_fixed : return true; + case ARM::VLD1DUPd32wb_fixed : return true; + case ARM::VLD1DUPq8wb_fixed : return true; + case ARM::VLD1DUPq16wb_fixed : return true; + case ARM::VLD1DUPq32wb_fixed : return true; case ARM::VLD2d8wb_fixed : return true; case ARM::VLD2d16wb_fixed : return true; case ARM::VLD2d32wb_fixed : return true; @@ -1815,6 +1822,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; + case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; + case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; + case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; + case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; + case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; + case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; @@ -2255,8 +2268,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, } void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, - const uint16_t *Opcodes) { - assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); + const uint16_t *DOpcodes, + const uint16_t *QOpcodes) { + assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); SDLoc dl(N); SDValue MemAddr, Align; @@ -2284,19 +2298,21 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, } Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); - unsigned OpcodeIndex; + unsigned Opc; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("unhandled vld-dup type"); - case MVT::v8i8: OpcodeIndex = 0; break; - case MVT::v4i16: OpcodeIndex = 1; break; + case MVT::v8i8: Opc = DOpcodes[0]; break; + case MVT::v16i8: Opc = QOpcodes[0]; break; + case MVT::v4i16: Opc = DOpcodes[1]; break; + case MVT::v8i16: Opc = QOpcodes[1]; break; case MVT::v2f32: - case MVT::v2i32: OpcodeIndex = 2; break; + case MVT::v2i32: Opc = DOpcodes[2]; break; + case MVT::v4f32: + case MVT::v4i32: Opc = QOpcodes[2]; break; } SDValue Pred = getAL(CurDAG, dl); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SDValue SuperReg; - unsigned Opc = Opcodes[OpcodeIndex]; SmallVector<SDValue, 6> Ops; Ops.push_back(MemAddr); Ops.push_back(Align); @@ -2304,6 +2320,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, // fixed-stride update instructions don't have an explicit writeback // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); + if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); if (!isa<ConstantSDNode>(Inc.getNode())) Ops.push_back(Inc); // FIXME: VLD3 and VLD4 haven't been updated to that form yet. @@ -2322,14 +2340,18 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, ResTys.push_back(MVT::Other); SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); - SuperReg = SDValue(VLdDup, 0); // Extract the subregisters. - static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); - unsigned SubIdx = ARM::dsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); + if (NumVecs == 1) { + ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); + } else { + SDValue SuperReg = SDValue(VLdDup, 0); + static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); + unsigned SubIdx = ARM::dsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); + } ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); if (isUpdating) ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); @@ -3269,6 +3291,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } + case ARMISD::VLD1DUP: { + static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, + ARM::VLD1DUPd32 }; + static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, + ARM::VLD1DUPq32 }; + SelectVLDDup(N, false, 1, DOpcodes, QOpcodes); + return; + } + case ARMISD::VLD2DUP: { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, ARM::VLD2DUPd32 }; @@ -3292,6 +3323,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; } + case ARMISD::VLD1DUP_UPD: { + static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, + ARM::VLD1DUPd16wb_fixed, + ARM::VLD1DUPd32wb_fixed }; + static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, + ARM::VLD1DUPq16wb_fixed, + ARM::VLD1DUPq32wb_fixed }; + SelectVLDDup(N, true, 1, DOpcodes, QOpcodes); + return; + } + case ARMISD::VLD2DUP_UPD: { static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, ARM::VLD2DUPd16wb_fixed, |