diff options
author | Mohammed Agabaria <mohammed.agabaria@intel.com> | 2017-11-20 08:18:12 +0000 |
---|---|---|
committer | Mohammed Agabaria <mohammed.agabaria@intel.com> | 2017-11-20 08:18:12 +0000 |
commit | 115f68ea3eb74d5b1cd6e84e55554b149ca80a61 (patch) | |
tree | 7780ba2a9b1537de0f2e7779946a60ca110f2644 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp | |
parent | 198f7d78d39dc587f83924a82cf2aac3df0ffba3 (diff) | |
download | bcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.tar.gz bcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.zip |
[LV][X86] Support of AVX2 Gathers code generation and update the LV with this
This patch depends on: https://reviews.llvm.org/D35348
Support of pattern selection of masked gathers of AVX2 (X86\AVX2 code gen)
Update LoopVectorize to generate gathers for AVX2 processors.
Reviewers: delena, zvi, RKSimon, craig.topper, aaboud, igorb
Reviewed By: delena, RKSimon
Differential Revision: https://reviews.llvm.org/D35772
llvm-svn: 318641
Diffstat (limited to 'llvm/lib/Target/X86/X86TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index e4505b29e6c..9328afc93e2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2368,8 +2368,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr, // Trying to reduce IndexSize to 32 bits for vector 16. // By default the IndexSize is equal to pointer size. - unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) : - DL.getPointerSizeInBits(); + unsigned IndexSize = (ST->hasAVX512() && VF >= 16) + ? getIndexSizeInBits(Ptr, DL) + : DL.getPointerSizeInBits(); Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(), IndexSize), VF); @@ -2385,7 +2386,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr, // The gather / scatter cost is given by Intel architects. It is a rough // number since we are looking at one instruction in a time. - const int GSOverhead = 2; + const int GSOverhead = (Opcode == Instruction::Load) + ? ST->getGatherOverhead() + : ST->getScatterOverhead(); return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), Alignment, AddressSpace); } @@ -2456,7 +2459,7 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, // the mask vector will add more instructions. Right now we give the scalar // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction // is better in the VariableMask case. - if (VF == 2 || (VF == 4 && !ST->hasVLX())) + if (ST->hasAVX512() && (VF == 2 || (VF == 4 && !ST->hasVLX()))) Scalarize = true; if (Scalarize) @@ -2515,11 +2518,15 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) { int DataWidth = isa<PointerType>(ScalarTy) ? DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits(); - // AVX-512 allows gather and scatter - return (DataWidth == 32 || DataWidth == 64) && ST->hasAVX512(); + // AVX-512 and Skylake AVX2 allows gather and scatter + return (DataWidth == 32 || DataWidth == 64) && (ST->hasAVX512() || + ST->getProcFamily() == X86Subtarget::IntelSkylake); } bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) { + // AVX2 doesn't support scatter + if (!ST->hasAVX512()) + return false; return isLegalMaskedGather(DataType); } |