[LV][X86] Support of AVX2 Gathers code generation and update the LV with this

This patch depends on: https://reviews.llvm.org/D35348 Support of pattern selection of masked gathers of AVX2 (X86\AVX2 code gen) Update LoopVectorize to generate gathers for AVX2 processors. Reviewers: delena, zvi, RKSimon, craig.topper, aaboud, igorb Reviewed By: delena, RKSimon Differential Revision: https://reviews.llvm.org/D35772 llvm-svn: 318641
author: Mohammed Agabaria <mohammed.agabaria@intel.com> 2017-11-20 08:18:12 +0000
committer: Mohammed Agabaria <mohammed.agabaria@intel.com> 2017-11-20 08:18:12 +0000
commit: 115f68ea3eb74d5b1cd6e84e55554b149ca80a61 (patch)
tree: 7780ba2a9b1537de0f2e7779946a60ca110f2644 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent: 198f7d78d39dc587f83924a82cf2aac3df0ffba3 (diff)
download: bcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.tar.gz
bcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.zip
1 files changed, 13 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index e4505b29e6c..9328afc93e2 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2368,8 +2368,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
 
   // Trying to reduce IndexSize to 32 bits for vector 16.
   // By default the IndexSize is equal to pointer size.
-  unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) :
-    DL.getPointerSizeInBits();
+  unsigned IndexSize = (ST->hasAVX512() && VF >= 16)
+                           ? getIndexSizeInBits(Ptr, DL)
+                           : DL.getPointerSizeInBits();
 
   Type *IndexVTy = VectorType::get(IntegerType::get(SrcVTy->getContext(),
                                                     IndexSize), VF);
@@ -2385,7 +2386,9 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
 
   // The gather / scatter cost is given by Intel architects. It is a rough
   // number since we are looking at one instruction in a time.
-  const int GSOverhead = 2;
+  const int GSOverhead = (Opcode == Instruction::Load)
+                             ? ST->getGatherOverhead()
+                             : ST->getScatterOverhead();
   return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
                                            Alignment, AddressSpace);
 }
@@ -2456,7 +2459,7 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
   // the mask vector will add more instructions. Right now we give the scalar
   // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction
   // is better in the VariableMask case.
-  if (VF == 2 || (VF == 4 && !ST->hasVLX()))
+  if (ST->hasAVX512() && (VF == 2 || (VF == 4 && !ST->hasVLX())))
     Scalarize = true;
 
   if (Scalarize)
@@ -2515,11 +2518,15 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
   int DataWidth = isa<PointerType>(ScalarTy) ?
     DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
 
-  // AVX-512 allows gather and scatter
-  return (DataWidth == 32 || DataWidth == 64) && ST->hasAVX512();
+  // AVX-512 and Skylake AVX2 allows gather and scatter
+  return (DataWidth == 32 || DataWidth == 64) && (ST->hasAVX512() ||
+      ST->getProcFamily() == X86Subtarget::IntelSkylake);
 }
 
 bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
+  // AVX2 doesn't support scatter
+  if (!ST->hasAVX512())
+    return false;
   return isLegalMaskedGather(DataType);
 }
author	Mohammed Agabaria <mohammed.agabaria@intel.com>	2017-11-20 08:18:12 +0000
committer	Mohammed Agabaria <mohammed.agabaria@intel.com>	2017-11-20 08:18:12 +0000
commit	115f68ea3eb74d5b1cd6e84e55554b149ca80a61 (patch)
tree	7780ba2a9b1537de0f2e7779946a60ca110f2644 /llvm/lib/Target/X86/X86TargetTransformInfo.cpp
parent	198f7d78d39dc587f83924a82cf2aac3df0ffba3 (diff)
download	bcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.tar.gz bcm5719-llvm-115f68ea3eb74d5b1cd6e84e55554b149ca80a61.zip