summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86.td58
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp29
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td7
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp10
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h4
-rw-r--r--llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll72
-rw-r--r--llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll2
8 files changed, 136 insertions, 49 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 7d9f396c1e9..dc5ab1bf65d 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -182,6 +182,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
"Support RDSEED instruction">;
+def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
+ "Support LAHF and SAHF instructions">;
def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true",
"Support MPX instructions">;
def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
@@ -273,7 +275,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureSSSE3,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
]>;
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureSlowUAMem16,
@@ -281,7 +284,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureSSE41,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
]>;
// Atom CPUs.
@@ -299,7 +303,8 @@ class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
FeatureSlowDivide64,
FeatureCallRegIndirect,
FeatureLEAUsesAG,
- FeaturePadShortFunctions
+ FeaturePadShortFunctions,
+ FeatureLAHFSAHF
]>;
def : BonnellProc<"bonnell">;
def : BonnellProc<"atom">; // Pin the generic name to the baseline.
@@ -319,7 +324,8 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
FeaturePRFCHW,
FeatureSlowLEA,
FeatureSlowIncDec,
- FeatureSlowBTMem
+ FeatureSlowBTMem,
+ FeatureLAHFSAHF
]>;
def : SilvermontProc<"silvermont">;
def : SilvermontProc<"slm">; // Legacy alias.
@@ -331,7 +337,8 @@ class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureSlowBTMem,
- FeaturePOPCNT
+ FeaturePOPCNT,
+ FeatureLAHFSAHF
]>;
def : NehalemProc<"nehalem">;
def : NehalemProc<"corei7">;
@@ -346,7 +353,8 @@ class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
- FeaturePCLMUL
+ FeaturePCLMUL,
+ FeatureLAHFSAHF
]>;
def : WestmereProc<"westmere">;
@@ -363,7 +371,8 @@ class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureAES,
FeaturePCLMUL,
FeatureXSAVE,
- FeatureXSAVEOPT
+ FeatureXSAVEOPT,
+ FeatureLAHFSAHF
]>;
def : SandyBridgeProc<"sandybridge">;
def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
@@ -382,7 +391,8 @@ class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureXSAVEOPT,
FeatureRDRAND,
FeatureF16C,
- FeatureFSGSBase
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
]>;
def : IvyBridgeProc<"ivybridge">;
def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
@@ -408,7 +418,8 @@ class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureFMA,
FeatureRTM,
FeatureHLE,
- FeatureSlowIncDec
+ FeatureSlowIncDec,
+ FeatureLAHFSAHF
]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
@@ -436,7 +447,8 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureHLE,
FeatureADX,
FeatureRDSEED,
- FeatureSlowIncDec
+ FeatureSlowIncDec,
+ FeatureLAHFSAHF
]>;
def : BroadwellProc<"broadwell">;
@@ -465,7 +477,8 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureRTM,
FeatureHLE,
FeatureSlowIncDec,
- FeatureMPX
+ FeatureMPX,
+ FeatureLAHFSAHF
]>;
def : KnightsLandingProc<"knl">;
@@ -500,7 +513,8 @@ class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
FeatureSlowIncDec,
FeatureMPX,
FeatureXSAVEC,
- FeatureXSAVES
+ FeatureXSAVES,
+ FeatureLAHFSAHF
]>;
def : SkylakeProc<"skylake">;
def : SkylakeProc<"skx">; // Legacy alias.
@@ -547,7 +561,7 @@ def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
FeatureSlowBTMem, FeatureSlowSHLD]>;
def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
// Bobcat
def : Proc<"btver1", [
@@ -560,7 +574,8 @@ def : Proc<"btver1", [
FeatureLZCNT,
FeaturePOPCNT,
FeatureXSAVE,
- FeatureSlowSHLD
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
]>;
// Jaguar
@@ -580,7 +595,8 @@ def : ProcessorModel<"btver2", BtVer2Model, [
FeaturePOPCNT,
FeatureXSAVE,
FeatureXSAVEOPT,
- FeatureSlowSHLD
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
]>;
// Bulldozer
@@ -598,7 +614,8 @@ def : Proc<"bdver1", [
FeatureLZCNT,
FeaturePOPCNT,
FeatureXSAVE,
- FeatureSlowSHLD
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
]>;
// Piledriver
def : Proc<"bdver2", [
@@ -619,7 +636,8 @@ def : Proc<"bdver2", [
FeatureBMI,
FeatureTBM,
FeatureFMA,
- FeatureSlowSHLD
+ FeatureSlowSHLD,
+ FeatureLAHFSAHF
]>;
// Steamroller
@@ -643,7 +661,8 @@ def : Proc<"bdver3", [
FeatureFMA,
FeatureXSAVEOPT,
FeatureSlowSHLD,
- FeatureFSGSBase
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
]>;
// Excavator
@@ -666,7 +685,8 @@ def : Proc<"bdver4", [
FeatureTBM,
FeatureFMA,
FeatureXSAVEOPT,
- FeatureFSGSBase
+ FeatureFSGSBase,
+ FeatureLAHFSAHF
]>;
def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2cf1d4ba30e..c07bca8fe52 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -13930,6 +13930,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
DAG.getConstant(8, dl, MVT::i8));
SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
+
+ // Some 64-bit targets lack SAHF support, but they do support FCOMI.
+ assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
}
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index e9d36f8ce2f..ebe329064c5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -4385,7 +4385,32 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
int Reg = FromEFLAGS ? DestReg : SrcReg;
bool is32 = X86::GR32RegClass.contains(Reg);
bool is64 = X86::GR64RegClass.contains(Reg);
+
if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
+ int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
+ int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
+ int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
+ int Pop = is64 ? X86::POP64r : X86::POP32r;
+ int PopF = is64 ? X86::POPF64 : X86::POPF32;
+ int AX = is64 ? X86::RAX : X86::EAX;
+
+ if (!Subtarget.hasLAHFSAHF()) {
+ assert(is64 && "Not having LAHF/SAHF only happens on 64-bit.");
+ // Moving EFLAGS to / from another register requires a push and a pop.
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
+ if (FromEFLAGS) {
+ BuildMI(MBB, MI, DL, get(PushF));
+ BuildMI(MBB, MI, DL, get(Pop), DestReg);
+ }
+ if (ToEFLAGS) {
+ BuildMI(MBB, MI, DL, get(Push))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ BuildMI(MBB, MI, DL, get(PopF));
+ }
+ return;
+ }
+
// The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
// inefficient. Instead:
// - Save the overflow flag OF into AL using SETO, and restore it using a
@@ -4407,10 +4432,6 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Notice that we have to adjust the stack if we don't want to clobber the
// first frame index. See X86FrameLowering.cpp - clobbersTheStack.
- int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
- int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
- int Pop = is64 ? X86::POP64r : X86::POP32r;
- int AX = is64 ? X86::RAX : X86::EAX;
bool AXDead = (Reg == AX);
// FIXME: The above could figure out that AX is dead in more cases with:
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 1e66739026e..1c21a098bc6 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -799,6 +799,7 @@ def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasMPX : Predicate<"Subtarget->hasMPX()">;
@@ -1502,10 +1503,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
let SchedRW = [WriteALU] in {
let Defs = [EFLAGS], Uses = [AH] in
def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf",
- [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
+ [(set EFLAGS, (X86sahf AH))], IIC_AHF>,
+ Requires<[HasLAHFSAHF]>;
let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [],
- IIC_AHF>; // AH = flags
+ IIC_AHF>, // AH = flags
+ Requires<[HasLAHFSAHF]>;
} // SchedRW
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 44a46b7e07a..f90a0b0d04f 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -189,6 +189,15 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit,+sse2";
}
+ // LAHF/SAHF are always supported in non-64-bit mode.
+ if (!In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+sahf," + FullFS;
+ else
+ FullFS = "+sahf";
+ }
+
+
// Parse features string and set the CPU.
ParseSubtargetFeatures(CPUName, FullFS);
@@ -264,6 +273,7 @@ void X86Subtarget::initializeEnvironment() {
HasSHA = false;
HasPRFCHW = false;
HasRDSEED = false;
+ HasLAHFSAHF = false;
HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 353b4f7f5eb..83bc640976a 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -152,6 +152,9 @@ protected:
/// Processor has RDSEED instructions.
bool HasRDSEED;
+ /// Processor has LAHF/SAHF instructions.
+ bool HasLAHFSAHF;
+
/// True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
@@ -374,6 +377,7 @@ public:
bool hasSHA() const { return HasSHA; }
bool hasPRFCHW() const { return HasPRFCHW; }
bool hasRDSEED() const { return HasRDSEED; }
+ bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
diff --git a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index 791edba89c4..c294dee4013 100644
--- a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,7 +1,11 @@
; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
; RUN: llc -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
+
; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
; RUN: llc -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf
+; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf
; FIXME: X86InstrInfo::copyPhysReg had code which figured out whether AX was
; live or not to avoid save / restore when it's not needed. See FIXME in
@@ -56,21 +60,31 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
; x8664-LABEL: test_intervening_call:
; x8664: cmpxchgq
-; x8664: pushq %rax
-; x8664-NEXT: seto %al
-; x8664-NEXT: lahf
-; x8664-NEXT: movq %rax, [[FLAGS:%.*]]
-; x8664-NEXT: popq %rax
+; x8664: pushfq
+; x8664-NEXT: popq [[FLAGS:%.*]]
; x8664-NEXT: movq %rax, %rdi
; x8664-NEXT: callq bar
+; x8664-NEXT: pushq [[FLAGS]]
+; x8664-NEXT: popfq
+; x8664-NEXT: jne
+
+; x8664-sahf-LABEL: test_intervening_call:
+; x8664-sahf: cmpxchgq
+; x8664-sahf: pushq %rax
+; x8664-sahf-NEXT: seto %al
+; x8664-sahf-NEXT: lahf
+; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-sahf-NEXT: popq %rax
+; x8664-sahf-NEXT: movq %rax, %rdi
+; x8664-sahf-NEXT: callq bar
; ** FIXME Next line isn't actually necessary. **
-; x8664-NEXT: pushq %rax
-; x8664-NEXT: movq [[FLAGS]], %rax
-; x8664-NEXT: addb $127, %al
-; x8664-NEXT: sahf
+; x8664-sahf-NEXT: pushq %rax
+; x8664-sahf-NEXT: movq [[FLAGS]], %rax
+; x8664-sahf-NEXT: addb $127, %al
+; x8664-sahf-NEXT: sahf
; ** FIXME Next line isn't actually necessary. **
-; x8664-NEXT: popq %rax
-; x8664-NEXT: jne
+; x8664-sahf-NEXT: popq %rax
+; x8664-sahf-NEXT: jne
%cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
%v = extractvalue { i64, i1 } %cx, 0
@@ -99,6 +113,10 @@ define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
; x8664: cmpxchg
; x8664-NEXT: jne
+; x8664-sahf-LABEL: test_control_flow:
+; x8664-sahf: cmpxchg
+; x8664-sahf-NEXT: jne
+
entry:
%cmp = icmp sgt i32 %i, %j
br i1 %cmp, label %loop_start, label %cond.end
@@ -165,20 +183,28 @@ define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
; i386f-NEXT: popl %eax
; x8664-LABEL: test_feed_cmov:
-; x8664: cmpxchgl
+; x8664: cmpxchg
+; x8664: pushfq
+; x8664-NEXT: popq [[FLAGS:%.*]]
+; x8664-NEXT: callq foo
+; x8664-NEXT: pushq [[FLAGS]]
+; x8664-NEXT: popfq
+
+; x8664-sahf-LABEL: test_feed_cmov:
+; x8664-sahf: cmpxchgl
; ** FIXME Next line isn't actually necessary. **
-; x8664: pushq %rax
-; x8664: seto %al
-; x8664-NEXT: lahf
-; x8664-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-sahf: pushq %rax
+; x8664-sahf: seto %al
+; x8664-sahf-NEXT: lahf
+; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
; ** FIXME Next line isn't actually necessary. **
-; x8664-NEXT: popq %rax
-; x8664-NEXT: callq foo
-; x8664-NEXT: pushq %rax
-; x8664-NEXT: movq [[FLAGS]], %rax
-; x8664-NEXT: addb $127, %al
-; x8664-NEXT: sahf
-; x8664-NEXT: popq %rax
+; x8664-sahf-NEXT: popq %rax
+; x8664-sahf-NEXT: callq foo
+; x8664-sahf-NEXT: pushq %rax
+; x8664-sahf-NEXT: movq [[FLAGS]], %rax
+; x8664-sahf-NEXT: addb $127, %al
+; x8664-sahf-NEXT: sahf
+; x8664-sahf-NEXT: popq %rax
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
%success = extractvalue { i32, i1 } %res, 1
diff --git a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
index 891a925611c..a8df33454e9 100644
--- a/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
+++ b/llvm/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -1,5 +1,5 @@
; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s
; FIXME Add -verify-machineinstrs back when PR24535 is fixed.
OpenPOWER on IntegriCloud