summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorYunzhong Gao <Yunzhong_Gao@playstation.sony.com>2016-02-12 23:37:57 +0000
committerYunzhong Gao <Yunzhong_Gao@playstation.sony.com>2016-02-12 23:37:57 +0000
commit0de36ec169b8c818487606658ed2504c88f4c0e7 (patch)
treeb7a70c0f81a344cc848172abf3d4384b0e6712d9 /llvm/lib/Target
parent76fbdeb7d5a65dcb220bc0d73cfb8dd66293fde5 (diff)
downloadbcm5719-llvm-0de36ec169b8c818487606658ed2504c88f4c0e7.tar.gz
bcm5719-llvm-0de36ec169b8c818487606658ed2504c88f4c0e7.zip
Disable the vzeroupper insertion pass on PS4.
Differential Revision: http://reviews.llvm.org/D16837 llvm-svn: 260764
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86.td8
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp1
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h5
-rw-r--r--llvm/lib/Target/X86/X86VZeroUpper.cpp2
4 files changed, 14 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 3c25f7327e3..527c38b9186 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -239,6 +239,11 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true",
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
+// On at least some AMD processors, there is no performance hazard to writing
+// only the lower parts of a YMM register without clearing the upper part.
+def FeatureFastPartialYMMWrite
+ : SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite",
+ "true", "Partial writes to YMM registers are fast">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -596,7 +601,8 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFastPartialYMMWrite
]>;
// Bulldozer
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index cca25ec7563..6e72a4e4a61 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -285,6 +285,7 @@ void X86Subtarget::initializeEnvironment() {
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
+ HasFastPartialYMMWrite = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index d355ca310ea..501770ca0d0 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -189,6 +189,10 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
+ /// True if there is no performance penalty to writing only the lower parts
+ /// of a YMM register without clearing the upper part.
+ bool HasFastPartialYMMWrite;
+
/// True if 8-bit divisions are significantly faster than
/// 32-bit divisions and should be used when possible.
bool HasSlowDivide32;
@@ -421,6 +425,7 @@ public:
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
+ bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86VZeroUpper.cpp
index 6925b272b4a..cd822b08361 100644
--- a/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -248,7 +248,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
/// vzeroupper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.hasAVX() || ST.hasAVX512())
+ if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
return false;
TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
OpenPOWER on IntegriCloud