diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86MacroFusion.cpp | 151 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.h | 1 |
5 files changed, 112 insertions, 56 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index fa8dd8a59f0..c054379acf7 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -344,6 +344,12 @@ def FeatureERMSB "ermsb", "HasERMSB", "true", "REP MOVS/STOS are fast">; +// Bulldozer and newer processors can merge CMP/TEST (but not other +// instructions) with conditional branches. +def FeatureBranchFusion + : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", + "CMP/TEST can be fused with conditional branches">; + // Sandy Bridge and newer processors have many instructions that can be // fused with conditional branches and pass through the CPU as a single // operation. @@ -810,7 +816,7 @@ def ProcessorFeatures { FeatureSlowSHLD, FeatureLAHFSAHF, FeatureFast11ByteNOP, - FeatureMacroFusion]; + FeatureBranchFusion]; list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures; // PileDriver @@ -860,7 +866,7 @@ def ProcessorFeatures { FeatureLZCNT, FeatureFastBEXTR, FeatureFast15ByteNOP, - FeatureMacroFusion, + FeatureBranchFusion, FeatureMMX, FeatureMOVBE, FeatureMWAITX, diff --git a/llvm/lib/Target/X86/X86MacroFusion.cpp b/llvm/lib/Target/X86/X86MacroFusion.cpp index e5e80a2339a..a5186562229 100644 --- a/llvm/lib/Target/X86/X86MacroFusion.cpp +++ b/llvm/lib/Target/X86/X86MacroFusion.cpp @@ -18,59 +18,29 @@ using namespace llvm; -/// Check if the instr pair, FirstMI and SecondMI, should be fused -/// together. Given SecondMI, when FirstMI is unspecified, then check if -/// SecondMI may be part of a fused pair at all. -static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, - const TargetSubtargetInfo &TSI, - const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { - const X86Subtarget &ST = static_cast<const X86Subtarget&>(TSI); - // Check if this processor supports macro-fusion. - if (!ST.hasMacroFusion()) - return false; +namespace { - enum { - FuseTest, - FuseCmp, - FuseInc - } FuseKind; +// The classification for the first instruction. +enum class FirstInstrKind { Test, Cmp, And, ALU, IncDec, Invalid }; - unsigned FirstOpcode = FirstMI - ? FirstMI->getOpcode() - : static_cast<unsigned>(X86::INSTRUCTION_LIST_END); - unsigned SecondOpcode = SecondMI.getOpcode(); +// The classification for the second instruction (jump). +enum class JumpKind { + // JE, JL, JG and variants. + ELG, + // JA, JB and variants. + AB, + // JS, JP, JO and variants. + SPO, + // Not a fusable jump. + Invalid, +}; - switch (SecondOpcode) { - default: - return false; - case X86::JE_1: - case X86::JNE_1: - case X86::JL_1: - case X86::JLE_1: - case X86::JG_1: - case X86::JGE_1: - FuseKind = FuseInc; - break; - case X86::JB_1: - case X86::JBE_1: - case X86::JA_1: - case X86::JAE_1: - FuseKind = FuseCmp; - break; - case X86::JS_1: - case X86::JNS_1: - case X86::JP_1: - case X86::JNP_1: - case X86::JO_1: - case X86::JNO_1: - FuseKind = FuseTest; - break; - } +} // namespace - switch (FirstOpcode) { +static FirstInstrKind classifyFirst(const MachineInstr &MI) { + switch (MI.getOpcode()) { default: - return false; + return FirstInstrKind::Invalid; case X86::TEST8rr: case X86::TEST16rr: case X86::TEST32rr: @@ -83,6 +53,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::TEST16mr: case X86::TEST32mr: case X86::TEST64mr: + return FirstInstrKind::Test; case X86::AND16ri: case X86::AND16ri8: case X86::AND16rm: @@ -98,7 +69,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::AND8ri: case X86::AND8rm: case X86::AND8rr: - return true; + return FirstInstrKind::And; case X86::CMP16ri: case X86::CMP16ri8: case X86::CMP16rm: @@ -118,6 +89,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::CMP8rm: case X86::CMP8rr: case X86::CMP8mr: + return FirstInstrKind::Cmp; case X86::ADD16ri: case X86::ADD16ri8: case X86::ADD16ri8_DB: @@ -159,7 +131,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::SUB8ri: case X86::SUB8rm: case X86::SUB8rr: - return FuseKind == FuseCmp || FuseKind == FuseInc; + return FirstInstrKind::ALU; case X86::INC16r: case X86::INC32r: case X86::INC64r: @@ -168,10 +140,83 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, case X86::DEC32r: case X86::DEC64r: case X86::DEC8r: - return FuseKind == FuseInc; - case X86::INSTRUCTION_LIST_END: - return true; + return FirstInstrKind::IncDec; + } +} + +static JumpKind classifySecond(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return JumpKind::Invalid; + case X86::JE_1: + case X86::JNE_1: + case X86::JL_1: + case X86::JLE_1: + case X86::JG_1: + case X86::JGE_1: + return JumpKind::ELG; + case X86::JB_1: + case X86::JBE_1: + case X86::JA_1: + case X86::JAE_1: + return JumpKind::AB; + case X86::JS_1: + case X86::JNS_1: + case X86::JP_1: + case X86::JNP_1: + case X86::JO_1: + case X86::JNO_1: + return JumpKind::SPO; + } +} + +/// Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const X86Subtarget &ST = static_cast<const X86Subtarget &>(TSI); + + // Check if this processor supports any kind of fusion. + if (!(ST.hasBranchFusion() || ST.hasMacroFusion())) + return false; + + const JumpKind BranchKind = classifySecond(SecondMI); + + if (BranchKind == JumpKind::Invalid) + return false; // Second cannot be fused with anything. + + if (FirstMI == nullptr) + return true; // We're only checking whether Second can be fused at all. + + const FirstInstrKind TestKind = classifyFirst(*FirstMI); + + if (ST.hasBranchFusion()) { + // Branch fusion can merge CMP and TEST with all conditional jumps. + return (TestKind == FirstInstrKind::Cmp || + TestKind == FirstInstrKind::Test); + } + + if (ST.hasMacroFusion()) { + // Macro Fusion rules are a bit more complex. See Agner Fog's + // Microarchitecture table 9.2 "Instruction Fusion". + switch (TestKind) { + case FirstInstrKind::Test: + case FirstInstrKind::And: + return true; + case FirstInstrKind::Cmp: + case FirstInstrKind::ALU: + return BranchKind == JumpKind::ELG || BranchKind == JumpKind::AB; + case FirstInstrKind::IncDec: + return BranchKind == JumpKind::ELG; + case FirstInstrKind::Invalid: + return false; + } } + + llvm_unreachable("unknown branch fusion type"); } namespace llvm { diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 8d330fa6f9a..2bd36d42c5c 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -297,6 +297,9 @@ protected: /// True if the processor supports macrofusion. bool HasMacroFusion = false; + /// True if the processor supports branch fusion. + bool HasBranchFusion = false; + /// True if the processor has enhanced REP MOVSB/STOSB. bool HasERMSB = false; @@ -642,6 +645,7 @@ public: bool hasFastBEXTR() const { return HasFastBEXTR; } bool hasFastHorizontalOps() const { return HasFastHorizontalOps; } bool hasMacroFusion() const { return HasMacroFusion; } + bool hasBranchFusion() const { return HasBranchFusion; } bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 16128ceb92b..134915128d2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2984,7 +2984,7 @@ bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, } bool X86TTIImpl::canMacroFuseCmp() { - return ST->hasMacroFusion(); + return ST->hasMacroFusion() || ST->hasBranchFusion(); } bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) { diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index fc9f2763332..e9afa02e67b 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -59,6 +59,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> { X86::FeatureLEAForSP, X86::FeatureLEAUsesAG, X86::FeatureLZCNTFalseDeps, + X86::FeatureBranchFusion, X86::FeatureMacroFusion, X86::FeatureMergeToThreeWayBranch, X86::FeaturePadShortFunctions, |

