diff options
| author | Hao Liu <Hao.Liu@arm.com> | 2015-06-26 02:32:07 +0000 |
|---|---|---|
| committer | Hao Liu <Hao.Liu@arm.com> | 2015-06-26 02:32:07 +0000 |
| commit | 7ec8ee311942d45b2362cfbd1da322cd38cb8a48 (patch) | |
| tree | f37f3fe099fe3778bcf30d2667901e759c2f32fe /llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | |
| parent | 25c910137abe33d02bf0d7b5c7a455eb2e4f3d2d (diff) | |
| download | bcm5719-llvm-7ec8ee311942d45b2362cfbd1da322cd38cb8a48.tar.gz bcm5719-llvm-7ec8ee311942d45b2362cfbd1da322cd38cb8a48.zip | |
[AArch64] Lower interleaved memory accesses to ldN/stN intrinsics. This patch also adds a function to calculate the cost of interleaved memory accesses.
E.g. Lower an interleaved load:
%wide.vec = load <8 x i32>, <8 x i32>* %ptr
%v0 = shuffle %wide.vec, undef, <0, 2, 4, 6>
%v1 = shuffle %wide.vec, undef, <1, 3, 5, 7>
into:
%ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
%vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
%vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
E.g. Lower an interleaved store:
%i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
store <12 x i32> %i.vec, <12 x i32>* %ptr
into:
%sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
%sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
%sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
Differential Revision: http://reviews.llvm.org/D10533
llvm-svn: 240754
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index ed27cf84bbb..fc91c94351c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -407,6 +407,26 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, return LT.first; } +unsigned AArch64TTIImpl::getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, + unsigned Alignment, unsigned AddressSpace) { + assert(Factor >= 2 && "Invalid interleave factor"); + assert(isa<VectorType>(VecTy) && "Expect a vector type"); + + if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { + unsigned NumElts = VecTy->getVectorNumElements(); + Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); + unsigned SubVecSize = TLI->getDataLayout()->getTypeAllocSize(SubVecTy); + + // ldN/stN only support legal vector types of size 64 or 128 in bits. + if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) + return Factor; + } + + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); +} + unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { unsigned Cost = 0; for (auto *I : Tys) { |

