diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 73 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 19 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 4 |
4 files changed, 31 insertions, 66 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 2e7902a6064..03b5a6064c9 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -95,8 +95,6 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true", def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true", "64-bit with cmpxchg16b", [Feature64Bit]>; -def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true", - "Bit testing of memory is slow">; def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", "SHLD instruction is slow">; def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", @@ -336,7 +334,7 @@ def : Proc<"pentium2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, def : Proc<"pentium3", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>; def : Proc<"pentium3m", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>; + FeatureSSE1, FeatureFXSR]>; // Enable the PostRAScheduler for SSE2 and SSE3 class cpus. // The intent is to enable it for pentium4 which is the current default @@ -350,7 +348,7 @@ def : Proc<"pentium3m", [FeatureX87, FeatureSlowUAMem16, FeatureMMX, def : ProcessorModel<"pentium-m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>; + FeatureSSE2, FeatureFXSR]>; def : ProcessorModel<"pentium4", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, @@ -358,7 +356,7 @@ def : ProcessorModel<"pentium4", GenericPostRAModel, def : ProcessorModel<"pentium4m", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, - FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>; + FeatureSSE2, FeatureFXSR]>; // Intel Quark. def : Proc<"lakemont", []>; @@ -366,20 +364,19 @@ def : Proc<"lakemont", []>; // Intel Core Duo. def : ProcessorModel<"yonah", SandyBridgeModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR, FeatureSlowBTMem]>; + FeatureFXSR]>; // NetBurst. def : ProcessorModel<"prescott", GenericPostRAModel, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, - FeatureFXSR, FeatureSlowBTMem]>; + FeatureFXSR]>; def : ProcessorModel<"nocona", GenericPostRAModel, [ FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR, - FeatureCMPXCHG16B, - FeatureSlowBTMem + FeatureCMPXCHG16B ]>; // Intel Core 2 Solo/Duo. @@ -390,7 +387,6 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureSSSE3, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureLAHFSAHF, FeatureMacroFusion ]>; @@ -401,7 +397,6 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureSSE41, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureLAHFSAHF, FeatureMacroFusion ]>; @@ -416,7 +411,6 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [ FeatureFXSR, FeatureCMPXCHG16B, FeatureMOVBE, - FeatureSlowBTMem, FeatureLEAForSP, FeatureSlowDivide32, FeatureSlowDivide64, @@ -444,7 +438,6 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [ FeaturePRFCHW, FeatureSlowLEA, FeatureSlowIncDec, - FeatureSlowBTMem, FeatureSlowPMULLD, FeatureLAHFSAHF ]>; @@ -466,7 +459,6 @@ class GoldmontProc<string Name> : ProcessorModel<Name, SLMModel, [ FeatureSlowTwoMemOps, FeatureSlowLEA, FeatureSlowIncDec, - FeatureSlowBTMem, FeatureLAHFSAHF, FeatureMPX, FeatureSHA, @@ -488,7 +480,6 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureSSE42, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeaturePOPCNT, FeatureLAHFSAHF, FeatureMacroFusion @@ -504,7 +495,6 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [ FeatureSSE42, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, @@ -547,7 +537,6 @@ def SNBFeatures : ProcessorFeatures<[], [ class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel, SNBFeatures.Value, [ - FeatureSlowBTMem, FeatureSlowUAMem32 ]>; def : SandyBridgeProc<"sandybridge">; @@ -561,7 +550,6 @@ def IVBFeatures : ProcessorFeatures<SNBFeatures.Value, [ class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel, IVBFeatures.Value, [ - FeatureSlowBTMem, FeatureSlowUAMem32 ]>; def : IvyBridgeProc<"ivybridge">; @@ -579,8 +567,7 @@ def HSWFeatures : ProcessorFeatures<IVBFeatures.Value, [ class HaswellProc<string Name> : ProcModel<Name, HaswellModel, HSWFeatures.Value, [ - ProcIntelHSW, - FeatureSlowBTMem + ProcIntelHSW ]>; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. @@ -591,8 +578,7 @@ def BDWFeatures : ProcessorFeatures<HSWFeatures.Value, [ ]>; class BroadwellProc<string Name> : ProcModel<Name, HaswellModel, BDWFeatures.Value, [ - ProcIntelBDW, - FeatureSlowBTMem + ProcIntelBDW ]>; def : BroadwellProc<"broadwell">; @@ -608,8 +594,7 @@ def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [ class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel, SKLFeatures.Value, [ - ProcIntelSKL, - FeatureSlowBTMem + ProcIntelSKL ]>; def : SkylakeClientProc<"skylake">; @@ -632,7 +617,6 @@ def KNLFeatures : ProcessorFeatures<IVBFeatures.Value, [ class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel, KNLFeatures.Value, [ ProcIntelKNL, - FeatureSlowBTMem, FeatureSlowTwoMemOps, FeatureFastPartialYMMorZMMWrite ]>; @@ -641,7 +625,6 @@ def : KnightsLandingProc<"knl">; class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel, KNLFeatures.Value, [ ProcIntelKNL, - FeatureSlowBTMem, FeatureSlowTwoMemOps, FeatureFastPartialYMMorZMMWrite ]>; @@ -659,8 +642,7 @@ def SKXFeatures : ProcessorFeatures<SKLFeatures.Value, [ class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel, SKXFeatures.Value, [ - ProcIntelSKX, - FeatureSlowBTMem + ProcIntelSKX ]>; def : SkylakeServerProc<"skylake-avx512">; def : SkylakeServerProc<"skx">; // Legacy alias. @@ -673,8 +655,7 @@ def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [ class CannonlakeProc<string Name> : ProcModel<Name, HaswellModel, CNLFeatures.Value, [ - ProcIntelCNL, - FeatureSlowBTMem + ProcIntelCNL ]>; def : CannonlakeProc<"cannonlake">; @@ -684,46 +665,43 @@ def : Proc<"k6", [FeatureX87, FeatureSlowUAMem16, FeatureMMX]>; def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>; def : Proc<"athlon", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon-tbird", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA, - FeatureSlowBTMem, FeatureSlowSHLD]>; -def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, - Feature3DNowA, FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>; +def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, + Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>; def : Proc<"athlon-xp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, - Feature3DNowA, FeatureFXSR, FeatureSlowBTMem, - FeatureSlowSHLD]>; + Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>; def : Proc<"athlon-mp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1, - Feature3DNowA, FeatureFXSR, FeatureSlowBTMem, - FeatureSlowSHLD]>; + Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>; def : Proc<"k8", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"opteron", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon64", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon-fx", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA, FeatureFXSR, Feature64Bit, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"k8-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"opteron-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"athlon64-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowSHLD]>; def : Proc<"amdfam10", [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD, + FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF]>; def : Proc<"barcelona", [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, - FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD, + FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF]>; // Bobcat @@ -929,7 +907,6 @@ def : ProcessorModel<"x86-64", SandyBridgeModel, [ FeatureFXSR, Feature64Bit, FeatureSlow3OpsLEA, - FeatureSlowBTMem, FeatureSlowIncDec, FeatureMacroFusion ]>; diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index d5e2caa8f28..2cde6c02e95 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -904,7 +904,6 @@ let RecomputePerFunction = 1 in { "MF->getFunction()->optForSize()">; } -def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">; def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">; def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; @@ -1672,25 +1671,20 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's // perspective, this is pretty bizarre. Make these instructions disassembly -// only for now. +// only for now. These instructions are also slow on modern CPUs so that's +// another reason to avoid generating them. let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in { def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - // [(X86bt (loadi16 addr:$src1), GR16:$src2), - // (implicit EFLAGS)] [], IIC_BT_MR - >, OpSize16, TB, Requires<[FastBTMem]>, NotMemoryFoldable; + >, OpSize16, TB, NotMemoryFoldable; def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - // [(X86bt (loadi32 addr:$src1), GR32:$src2), - // (implicit EFLAGS)] [], IIC_BT_MR - >, OpSize32, TB, Requires<[FastBTMem]>, NotMemoryFoldable; + >, OpSize32, TB, NotMemoryFoldable; def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - // [(X86bt (loadi64 addr:$src1), GR64:$src2), - // (implicit EFLAGS)] [], IIC_BT_MR >, TB, NotMemoryFoldable; } @@ -1710,9 +1704,8 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), IIC_BT_RI>, TB; } // SchedRW -// Note that these instructions don't need FastBTMem because that -// only applies when the other operand is in a register. When it's -// an immediate, bt is still fast. +// Note that these instructions aren't slow because that only applies when the +// other operand is in a register. When it's an immediate, bt is still fast. let SchedRW = [WriteALU] in { def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 13062ca8cfe..0de5619cff2 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -345,7 +345,6 @@ void X86Subtarget::initializeEnvironment() { HasSGX = false; HasCLFLUSHOPT = false; HasCLWB = false; - IsBTMemSlow = false; IsPMULLDSlow = false; IsSHLDSlow = false; IsUAMem16Slow = false; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 5816e860e01..d866ecc2ef9 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -193,9 +193,6 @@ protected: /// Processor has Prefetch with intent to Write instruction bool HasPFPREFETCHWT1; - /// True if BT (bit test) of memory instructions are slow. - bool IsBTMemSlow; - /// True if SHLD instructions are slow. bool IsSHLDSlow; @@ -489,7 +486,6 @@ public: bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } bool hasCLZERO() const { return HasCLZERO; } - bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isPMULLDSlow() const { return IsPMULLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } |

