4 files changed, 18 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4c9aac5953a..03de28ed6b0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12952,9 +12952,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL);
     SDValue Args[] = { Chain, Offset };
     Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
-    Chain =
-        DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
-                           DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+                               DAG.getIntPtrConstant(0, DL, true),
+                               Chain.getValue(1), DL);
 
     // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index faf0f816b5e..8feea124451 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -490,10 +490,13 @@ def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                 [(X86TLSCall addr:$sym)]>,
                 Requires<[Not64BitMode]>;
 
-// For x86_64, the address of the thunk is passed in %rdi, on return
-// the address of the variable is in %rax.  All other registers are preserved.
+// For x86_64, the address of the thunk is passed in %rdi, but the
+// pseudo directly use the symbol, so do not add an implicit use of
+// %rdi. The lowering will do the right thing with RDI.
+// On return the address of the variable is in %rax.  All other
+// registers are preserved.
 let Defs = [RAX, EFLAGS],
-    Uses = [RSP, RDI],
+    Uses = [RSP],
     usesCustomInserter = 1 in
 def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
                   "# TLSCall_64",
diff --git a/llvm/test/CodeGen/X86/cxx_tlscc64.ll b/llvm/test/CodeGen/X86/cxx_tlscc64.ll
index 1915f221c50..ef947367c09 100644
--- a/llvm/test/CodeGen/X86/cxx_tlscc64.ll
+++ b/llvm/test/CodeGen/X86/cxx_tlscc64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=x86_64-apple-darwin | FileCheck %s
 ; TLS function were wrongly model and after fixing that, shrink-wrapping
 ; cannot help here. To achieve the expected lowering, we need to playing
 ; tricks similar to AArch64 fast TLS calling convention (r255821).
diff --git a/llvm/test/CodeGen/X86/i386-tlscall-fastregalloc.ll b/llvm/test/CodeGen/X86/i386-tlscall-fastregalloc.ll
index 775c0c1b378..86f6f5872d0 100644
--- a/llvm/test/CodeGen/X86/i386-tlscall-fastregalloc.ll
+++ b/llvm/test/CodeGen/X86/i386-tlscall-fastregalloc.ll
@@ -10,15 +10,20 @@ target triple = "i386-apple-macosx10.10"
 ; PR26485.
 ;
 ; CHECK-LABEL: f:
+; Get c.
+; C is spilled because of the scheduling of the instructions,
+; but a smarter regalloc wouldn't have spilled it.
+; CHECK: movl L_c{{[^,]*}}, [[C_ADDR:%[a-z]+]]
+; CHECK-NEXT: movl [[C_ADDR]], [[C_SPILLED:[0-8]+\(%esp\)]]
 ; Get p.
-; CHECK: movl _p@{{[0-9a-zA-Z]+}}, [[P_ADDR:%[a-z]+]]
+; CHECK-NEXT: movl _p@{{[0-9a-zA-Z]+}}, [[P_ADDR:%[a-z]+]]
 ; CHECK-NEXT: calll *([[P_ADDR]])
 ; At this point eax contiains the address of p.
 ; Load c address.
 ; Make sure we do not clobber eax.
-; CHECK-NEXT: movl L_c{{[^,]*}}, [[C_ADDR:%e[b-z]x+]]
+; CHECK-NEXT: movl [[C_SPILLED]], [[C_ADDR_RELOADED:%e[b-z]x+]]
 ; Store c address into p.
-; CHECK-NEXT: movl [[C_ADDR]], (%eax)
+; CHECK-NEXT: movl [[C_ADDR_RELOADED]], (%eax)
 define void @f() #0 {
 entry:
   store i8* @c, i8** @p, align 4