summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86Subtarget.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86Subtarget.cpp')
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp12
1 files changed, 6 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 8543d189cdb..0f995404618 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -270,14 +270,13 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
isTargetKFreeBSD() || In64BitMode)
stackAlignment = 16;
- // Gather is available since Haswell (AVX2 set). So technically, we can
- // generate Gathers on all AVX2 processors. But the overhead on HSW is high.
- // Skylake Client processor has faster Gathers than HSW and performance is
- // similar to Skylake Server (AVX-512). The specified overhead is relative to
- // the Load operation. "2" is the number provided by Intel architects. This
+ // Some CPUs have more overhead for gather. The specified overhead is relative
+ // to the Load operation. "2" is the number provided by Intel architects. This
// parameter is used for cost estimation of Gather Op and comparison with
// other alternatives.
- if (X86ProcFamily == IntelSkylake || hasAVX512())
+ // TODO: Remove the explicit hasAVX512()?, That would mean we would only
+ // enable gather with a -march.
+ if (hasAVX512() || (hasAVX2() && hasFastGather()))
GatherOverhead = 2;
if (hasAVX512())
ScatterOverhead = 2;
@@ -345,6 +344,7 @@ void X86Subtarget::initializeEnvironment() {
HasCmpxchg16b = false;
UseLeaForSP = false;
HasFastPartialYMMorZMMWrite = false;
+ HasFastGather = false;
HasFastScalarFSQRT = false;
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
OpenPOWER on IntegriCloud