summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorDorit Nuzman <dorit.nuzman@intel.com>2018-10-31 09:57:56 +0000
committerDorit Nuzman <dorit.nuzman@intel.com>2018-10-31 09:57:56 +0000
commit34da6dd696439e195e7b650d97a95913101a88d9 (patch)
treea718d6a89ceb39ada3675f96f8de45c051e8ce7f /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent889356eb719ded45c708514fb03777f705eb5934 (diff)
downloadbcm5719-llvm-34da6dd696439e195e7b650d97a95913101a88d9.tar.gz
bcm5719-llvm-34da6dd696439e195e7b650d97a95913101a88d9.zip
[LV] Support vectorization of interleave-groups that require an epilog under
optsize using masked wide loads Under Opt for Size, the vectorizer does not vectorize interleave-groups that have gaps at the end of the group (such as a loop that reads only the even elements: a[2*i]) because that implies that we'll require a scalar epilogue (which is not allowed under Opt for Size). This patch extends the support for masked-interleave-groups (introduced by D53011 for conditional accesses) to also cover the case of gaps in a group of loads; Targets that enable the masked-interleave-group feature don't have to invalidate interleave-groups of loads with gaps; they could now use masked wide-loads and shuffles (if that's what the cost model selects). Reviewers: Ayal, hsaito, dcaballe, fhahn Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D53668 llvm-svn: 345705
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp28
1 files changed, 18 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 8d8bc0b35cb..ebb8aca5fb1 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2784,11 +2784,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace,
- bool IsMasked) {
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
- if (IsMasked)
+ if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, IsMasked);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
// We currently Support only fully-interleaved groups, with no gaps.
// TODO: Support also strided loads (interleaved-groups with gaps).
@@ -2898,11 +2900,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace,
- bool IsMasked) {
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
- if (IsMasked)
+ if (UseMaskForCond || UseMaskForGaps)
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, IsMasked);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
// VecTy for interleave memop is <VF*Factor x Elt>.
// So, for VF=4, Interleave Factor = 3, Element type = i32 we have
@@ -3021,7 +3025,8 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
ArrayRef<unsigned> Indices,
unsigned Alignment,
unsigned AddressSpace,
- bool IsMasked) {
+ bool UseMaskForCond,
+ bool UseMaskForGaps) {
auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
Type *EltTy = VecTy->getVectorElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
@@ -3033,11 +3038,14 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
};
if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, IsMasked);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
if (ST->hasAVX2())
return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, IsMasked);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace, IsMasked);
+ Alignment, AddressSpace,
+ UseMaskForCond, UseMaskForGaps);
}
OpenPOWER on IntegriCloud