diff options
| author | Dorit Nuzman <dorit.nuzman@intel.com> | 2018-10-31 09:57:56 +0000 |
|---|---|---|
| committer | Dorit Nuzman <dorit.nuzman@intel.com> | 2018-10-31 09:57:56 +0000 |
| commit | 34da6dd696439e195e7b650d97a95913101a88d9 (patch) | |
| tree | a718d6a89ceb39ada3675f96f8de45c051e8ce7f /llvm/lib/Target/X86/X86TargetTransformInfo.cpp | |
| parent | 889356eb719ded45c708514fb03777f705eb5934 (diff) | |
| download | bcm5719-llvm-34da6dd696439e195e7b650d97a95913101a88d9.tar.gz bcm5719-llvm-34da6dd696439e195e7b650d97a95913101a88d9.zip | |
[LV] Support vectorization of interleave-groups that require an epilog under
optsize using masked wide loads
Under Opt for Size, the vectorizer does not vectorize interleave-groups that
have gaps at the end of the group (such as a loop that reads only the even
elements: a[2*i]) because that implies that we'll require a scalar epilogue
(which is not allowed under Opt for Size). This patch extends the support for
masked-interleave-groups (introduced by D53011 for conditional accesses) to
also cover the case of gaps in a group of loads; Targets that enable the
masked-interleave-group feature don't have to invalidate interleave-groups of
loads with gaps; they could now use masked wide-loads and shuffles (if that's
what the cost model selects).
Reviewers: Ayal, hsaito, dcaballe, fhahn
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D53668
llvm-svn: 345705
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 28 |
1 files changed, 18 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 8d8bc0b35cb..ebb8aca5fb1 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2784,11 +2784,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace, - bool IsMasked) { + bool UseMaskForCond, + bool UseMaskForGaps) { - if (IsMasked) + if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); // We currently Support only fully-interleaved groups, with no gaps. // TODO: Support also strided loads (interleaved-groups with gaps). @@ -2898,11 +2900,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace, - bool IsMasked) { + bool UseMaskForCond, + bool UseMaskForGaps) { - if (IsMasked) + if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); // VecTy for interleave memop is <VF*Factor x Elt>. // So, for VF=4, Interleave Factor = 3, Element type = i32 we have @@ -3021,7 +3025,8 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace, - bool IsMasked) { + bool UseMaskForCond, + bool UseMaskForGaps) { auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { Type *EltTy = VecTy->getVectorElementType(); if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || @@ -3033,11 +3038,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, }; if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); if (ST->hasAVX2()) return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, IsMasked); + Alignment, AddressSpace, + UseMaskForCond, UseMaskForGaps); } |

