diff options
author | Eli Friedman <efriedma@codeaurora.org> | 2016-12-16 18:44:08 +0000 |
---|---|---|
committer | Eli Friedman <efriedma@codeaurora.org> | 2016-12-16 18:44:08 +0000 |
commit | f624ec27b75701f1227eb03a44f3da84920c798d (patch) | |
tree | d10b422365e2b461b34e1a53f1f29a72cb5b5b00 /llvm/lib/Target/ARM/ARMISelLowering.cpp | |
parent | 79b4f0ad9cc5b019cf71bac388f1da1de4dd4e34 (diff) | |
download | bcm5719-llvm-f624ec27b75701f1227eb03a44f3da84920c798d.tar.gz bcm5719-llvm-f624ec27b75701f1227eb03a44f3da84920c798d.zip |
[ARM] Add ARMISD::VLD1DUP to match vld1_dup more consistently.
Currently, there are substantial problems forming vld1_dup even if the
VDUP survives legalization. The lack of an actual node
leads to terrible results: not only can we not form post-increment vld1_dup
instructions, but we form scalar pre-increment and post-increment
loads which force the loaded value into a GPR. This patch fixes that
by combining the vdup+load into an ARMISD node before DAGCombine
messes it up.
Also includes a crash fix for vld2_dup (see testcase @vld2dupi8_postinc_variable).
Recommiting with fix to avoid forming vld1dup if the type of the load
doesn't match the type of the vdup (see
https://llvm.org/bugs/show_bug.cgi?id=31404).
Differential Revision: https://reviews.llvm.org/D27694
llvm-svn: 289972
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 30a27f3390a..2c88c75d886 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1428,6 +1428,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::MEMCPY: return "ARMISD::MEMCPY"; + case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; @@ -1438,6 +1439,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; + case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD"; case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; @@ -10473,6 +10475,7 @@ static SDValue CombineBaseUpdate(SDNode *N, isLaneOp = true; switch (N->getOpcode()) { default: llvm_unreachable("unexpected opcode for Neon base update"); + case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break; case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; @@ -10587,8 +10590,8 @@ static SDValue CombineBaseUpdate(SDNode *N, StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal); } - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, - Ops, AlignedVecTy, + EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy; + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT, MemN->getMemOperand()); // Update the uses. @@ -10733,6 +10736,31 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } +/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP. +static SDValue PerformVDUPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + + // Match VDUP(LOAD) -> VLD1DUP. + // We match this pattern here rather than waiting for isel because the + // transform is only legal for unindexed loads. + LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()); + if (LD && Op.hasOneUse() && LD->isUnindexed() && + LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) { + SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1), + DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) }; + SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, + Ops, LD->getMemoryVT(), + LD->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1)); + return VLDDup; + } + + return SDValue(); +} + static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { EVT VT = N->getValueType(0); @@ -11560,6 +11588,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); + case ARMISD::VDUP: return PerformVDUPCombine(N, DCI); case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI.DAG, Subtarget); @@ -11575,6 +11604,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG); case ISD::LOAD: return PerformLOADCombine(N, DCI); + case ARMISD::VLD1DUP: case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: |