diff options
| author | Craig Topper <craig.topper@intel.com> | 2017-08-29 05:14:27 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2017-08-29 05:14:27 +0000 |
| commit | 62c47a2aa50442e102252087a78219736376eb7e (patch) | |
| tree | 493d452eb2fde1762ab240ad76762e311fee9f12 | |
| parent | af6e7e2b0cf8d786ee1e488ce87f18f826c211b6 (diff) | |
| download | bcm5719-llvm-62c47a2aa50442e102252087a78219736376eb7e.tar.gz bcm5719-llvm-62c47a2aa50442e102252087a78219736376eb7e.zip | |
Mark Knights Landing as having slow two memory operand instructions
Summary: Knights Landing, because it is Atom derived, has slow two memory operand instructions. Mark the Knights Landing CPU model accordingly.
Patch by David Zarzycki.
Reviewers: craig.topper
Reviewed By: craig.topper
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D37224
llvm-svn: 311979
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 21 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fold-push.ll | 2 |
7 files changed, 20 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 93521a38858..37a7cdd779d 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -226,14 +226,12 @@ def FeatureCLFLUSHOPT : SubtargetFeature<"clflushopt", "HasCLFLUSHOPT", "true", "Flush A Cache Line Optimized">; def FeatureCLWB : SubtargetFeature<"clwb", "HasCLWB", "true", "Cache Line Write Back">; -// TODO: This feature ought to be renamed. -// What it really refers to are CPUs for which certain instructions -// (which ones besides the example below?) are microcoded. -// The best examples of this are the memory forms of CALL and PUSH -// instructions, which should be avoided in favor of a MOV + register CALL/PUSH. -def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect", - "CallRegIndirect", "true", - "Call register indirect">; +// On some processors, instructions that implicitly take two memory operands are +// slow. In practice, this means that CALL, PUSH, and POP with memory operands +// should be avoided in favor of a MOV + register CALL/PUSH/POP. +def FeatureSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", + "SlowTwoMemOps", "true", + "Two memory operand instructions are slow">; def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", @@ -401,7 +399,7 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [ FeatureLEAForSP, FeatureSlowDivide32, FeatureSlowDivide64, - FeatureCallRegIndirect, + FeatureSlowTwoMemOps, FeatureLEAUsesAG, FeaturePadShortFunctions, FeatureLAHFSAHF @@ -421,7 +419,7 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [ FeaturePCLMUL, FeatureAES, FeatureSlowDivide64, - FeatureCallRegIndirect, + FeatureSlowTwoMemOps, FeaturePRFCHW, FeatureSlowLEA, FeatureSlowIncDec, @@ -444,7 +442,7 @@ class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [ FeaturePCLMUL, FeatureAES, FeaturePRFCHW, - FeatureCallRegIndirect, + FeatureSlowTwoMemOps, FeatureSlowLEA, FeatureSlowIncDec, FeatureSlowBTMem, @@ -597,6 +595,7 @@ class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel, FeatureBMI, FeatureBMI2, FeatureFMA, + FeatureSlowTwoMemOps, FeatureFastPartialYMMorZMMWrite ]>; def : KnightsLandingProc<"knl">; diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 17bf3523032..2fec38d7c40 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -575,7 +575,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { if (OptLevel != CodeGenOpt::None && // Only does this when target favors doesn't favor register indirect // call. - ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) || + ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && // Only does this if load can be folded into TC_RETURN. (Subtarget->is64Bit() || diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index b34b8f7d525..82d44410e65 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8010,13 +8010,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( unsigned Size, unsigned Align, bool AllowCommute) const { const DenseMap<unsigned, std::pair<uint16_t, uint16_t> > *OpcodeTablePtr = nullptr; - bool isCallRegIndirect = Subtarget.callRegIndirect(); + bool isSlowTwoMemOps = Subtarget.slowTwoMemOps(); bool isTwoAddrFold = false; // For CPUs that favor the register form of a call or push, // do not fold loads into calls or pushes, unless optimizing for size // aggressively. - if (isCallRegIndirect && !MF.getFunction()->optForMinSize() && + if (isSlowTwoMemOps && !MF.getFunction()->optForMinSize() && (MI.getOpcode() == X86::CALL32r || MI.getOpcode() == X86::CALL64r || MI.getOpcode() == X86::PUSH16r || MI.getOpcode() == X86::PUSH32r || MI.getOpcode() == X86::PUSH64r)) diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 2972de2e448..f20841bd02e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -912,7 +912,7 @@ let RecomputePerFunction = 1 in { def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; -def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; +def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">; def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index a3a9929d242..6ad6da95d7b 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -351,7 +351,7 @@ void X86Subtarget::initializeEnvironment() { HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; - CallRegIndirect = false; + SlowTwoMemOps = false; LEAUsesAG = false; SlowLEA = false; Slow3OpsLEA = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 2d9eef978df..e1e1cdfb8da 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -245,9 +245,9 @@ protected: /// a stall when returning too early. bool PadShortFunctions; - /// True if the Calls with memory reference should be converted - /// to a register-based indirect call. - bool CallRegIndirect; + /// True if two memory operand instructions should use a temporary register + /// instead. + bool SlowTwoMemOps; /// True if the LEA instruction inputs have to be ready at address generation /// (AG) time. @@ -492,7 +492,7 @@ public: bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } - bool callRegIndirect() const { return CallRegIndirect; } + bool slowTwoMemOps() const { return SlowTwoMemOps; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } bool slow3OpsLEA() const { return Slow3OpsLEA; } diff --git a/llvm/test/CodeGen/X86/fold-push.ll b/llvm/test/CodeGen/X86/fold-push.ll index 9d3afd1c449..c887b835aab 100644 --- a/llvm/test/CodeGen/X86/fold-push.ll +++ b/llvm/test/CodeGen/X86/fold-push.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL -; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM +; RUN: llc < %s -mtriple=i686-windows -mattr=slow-two-mem-ops | FileCheck %s -check-prefix=CHECK -check-prefix=SLM declare void @foo(i32 %r) |

