diff options
| author | Yunzhong Gao <Yunzhong_Gao@playstation.sony.com> | 2016-02-12 23:37:57 +0000 |
|---|---|---|
| committer | Yunzhong Gao <Yunzhong_Gao@playstation.sony.com> | 2016-02-12 23:37:57 +0000 |
| commit | 0de36ec169b8c818487606658ed2504c88f4c0e7 (patch) | |
| tree | b7a70c0f81a344cc848172abf3d4384b0e6712d9 /llvm/lib/Target | |
| parent | 76fbdeb7d5a65dcb220bc0d73cfb8dd66293fde5 (diff) | |
| download | bcm5719-llvm-0de36ec169b8c818487606658ed2504c88f4c0e7.tar.gz bcm5719-llvm-0de36ec169b8c818487606658ed2504c88f4c0e7.zip | |
Disable the vzeroupper insertion pass on PS4.
Differential Revision: http://reviews.llvm.org/D16837
llvm-svn: 260764
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86VZeroUpper.cpp | 2 |
4 files changed, 14 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 3c25f7327e3..527c38b9186 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -239,6 +239,11 @@ def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true", "Use software floating point features.">; +// On at least some AMD processors, there is no performance hazard to writing +// only the lower parts of a YMM register without clearing the upper part. +def FeatureFastPartialYMMWrite + : SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite", + "true", "Partial writes to YMM registers are fast">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -596,7 +601,8 @@ def : ProcessorModel<"btver2", BtVer2Model, [ FeatureXSAVE, FeatureXSAVEOPT, FeatureSlowSHLD, - FeatureLAHFSAHF + FeatureLAHFSAHF, + FeatureFastPartialYMMWrite ]>; // Bulldozer diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index cca25ec7563..6e72a4e4a61 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -285,6 +285,7 @@ void X86Subtarget::initializeEnvironment() { HasSSEUnalignedMem = false; HasCmpxchg16b = false; UseLeaForSP = false; + HasFastPartialYMMWrite = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index d355ca310ea..501770ca0d0 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -189,6 +189,10 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// True if there is no performance penalty to writing only the lower parts + /// of a YMM register without clearing the upper part. + bool HasFastPartialYMMWrite; + /// True if 8-bit divisions are significantly faster than /// 32-bit divisions and should be used when possible. bool HasSlowDivide32; @@ -421,6 +425,7 @@ public: bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } diff --git a/llvm/lib/Target/X86/X86VZeroUpper.cpp b/llvm/lib/Target/X86/X86VZeroUpper.cpp index 6925b272b4a..cd822b08361 100644 --- a/llvm/lib/Target/X86/X86VZeroUpper.cpp +++ b/llvm/lib/Target/X86/X86VZeroUpper.cpp @@ -248,7 +248,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) { /// vzeroupper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>(); - if (!ST.hasAVX() || ST.hasAVX512()) + if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite()) return false; TII = ST.getInstrInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); |

