summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
diff options
context:
space:
mode:
authorMohammed Agabaria <mohammed.agabaria@intel.com>2017-11-20 08:18:12 +0000
committerMohammed Agabaria <mohammed.agabaria@intel.com>2017-11-20 08:18:12 +0000
commit115f68ea3eb74d5b1cd6e84e55554b149ca80a61 (patch)
tree7780ba2a9b1537de0f2e7779946a60ca110f2644 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent198f7d78d39dc587f83924a82cf2aac3df0ffba3 (diff)
downloadbcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.tar.gz
bcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.zip
[LV][X86] Support of AVX2 Gathers code generation and update the LV with this
This patch depends on: https://reviews.llvm.org/D35348 Support of pattern selection of masked gathers of AVX2 (X86\AVX2 code gen) Update LoopVectorize to generate gathers for AVX2 processors. Reviewers: delena, zvi, RKSimon, craig.topper, aaboud, igorb Reviewed By: delena, RKSimon Differential Revision: https://reviews.llvm.org/D35772 llvm-svn: 318641
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp19
1 files changed, 13 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index e4505b29e6c..9328afc93e2 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2368,8 +2368,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
// Trying to reduce IndexSize to 32 bits for vector 16.
// By default the IndexSize is equal to pointer size.
- unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) :
- DL.getPointerSizeInBits();
+ unsigned IndexSize = (ST->hasAVX512() && VF >= 16)
+ ? getIndexSizeInBits(Ptr, DL)
+ : DL.getPointerSizeInBits();
Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(),
IndexSize), VF);
@@ -2385,7 +2386,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
// The gather / scatter cost is given by Intel architects. It is a rough
// number since we are looking at one instruction in a time.
- const int GSOverhead = 2;
+ const int GSOverhead = (Opcode == Instruction::Load)
+ ? ST->getGatherOverhead()
+ : ST->getScatterOverhead();
return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
Alignment, AddressSpace);
}
@@ -2456,7 +2459,7 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
// the mask vector will add more instructions. Right now we give the scalar
// cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction
// is better in the VariableMask case.
- if (VF == 2 || (VF == 4 && !ST->hasVLX()))
+ if (ST->hasAVX512() && (VF == 2 || (VF == 4 && !ST->hasVLX())))
Scalarize = true;
if (Scalarize)
@@ -2515,11 +2518,15 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
int DataWidth = isa<PointerType>(ScalarTy) ?
DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
- // AVX-512 allows gather and scatter
- return (DataWidth == 32 || DataWidth == 64) && ST->hasAVX512();
+ // AVX-512 and Skylake AVX2 allows gather and scatter
+ return (DataWidth == 32 || DataWidth == 64) && (ST->hasAVX512() ||
+ ST->getProcFamily() == X86Subtarget::IntelSkylake);
}
bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
+ // AVX2 doesn't support scatter
+ if (!ST->hasAVX512())
+ return false;
return isLegalMaskedGather(DataType);
}
OpenPOWER on IntegriCloud