diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-30 21:15:10 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-03-30 21:15:10 +0000 |
| commit | a4b1b6ea05645c8baacd5770540cf8c57549632b (patch) | |
| tree | 7745c7dfa588a62ace9eb7556e5fd1003c046ea3 /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | |
| parent | 5d518386b658f51e18e77d31e2f823c171572b71 (diff) | |
| download | bcm5719-llvm-a4b1b6ea05645c8baacd5770540cf8c57549632b.tar.gz bcm5719-llvm-a4b1b6ea05645c8baacd5770540cf8c57549632b.zip | |
LegalizeDAG: Don't replace vector load with integer unless legal
On AMDGPU we want to be able to promote i64/f64 loads to v2i32.
If the access is unaligned, this would conclude that since i64 is legal,
it would convert it back to i64 and there is an endless legalization
loop.
Extract the logic for scalarizing the load into a new TargetLowering
function, where this can also replace the custom function AMDGPU
has for this.
llvm-svn: 264927
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4ce2dce5022..9ac6cc923b9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3108,6 +3108,48 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, return true; } +SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, + SelectionDAG &DAG) const { + SDLoc SL(LD); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + unsigned NumElem = SrcVT.getVectorNumElements(); + + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = LD->getValueType(0).getScalarType(); + + unsigned Stride = SrcEltVT.getSizeInBits() / 8; + assert(SrcEltVT.isByteSized()); + + EVT PtrVT = BasePTR.getValueType(); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue ScalarLoad = DAG.getExtLoad( + ExtType, SL, DstEltVT, + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcEltVT, + LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), + MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); + + BasePTR = DAG.getNode(ISD::ADD, SL, PtrVT, BasePTR, + DAG.getConstant(Stride, SL, PtrVT)); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals); + + return DAG.getMergeValues({ Value, NewChain }, SL); +} + //===----------------------------------------------------------------------===// // Implementation of Emulated TLS Model //===----------------------------------------------------------------------===// |

