6 files changed, 132 insertions, 29 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index f42e626a9c9..a28d4eac839 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -991,11 +991,13 @@ bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
 
   bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP;
 
-  // Only do this address mode folding for 64-bit if we're in the small code
-  // model.
-  // FIXME: But we can do GOTPCREL addressing in the medium code model.
+  // We can't use an addressing mode in the 64-bit large code model. In the
+  // medium code model, we use can use an mode when RIP wrappers are present.
+  // That signifies access to globals that are known to be "near", such as the
+  // GOT itself.
   CodeModel::Model M = TM.getCodeModel();
-  if (Subtarget->is64Bit() && M != CodeModel::Small && M != CodeModel::Kernel)
+  if (Subtarget->is64Bit() &&
+      (M == CodeModel::Large || (M == CodeModel::Medium && !IsRIPRel)))
     return true;
 
   // Base and index reg must be 0 in order to use %rip as base.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index f360c0a6a63..373f8502037 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -37,6 +37,10 @@ let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP],
   def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
                       "", []>;
 
+// 64-bit large code model PIC base construction.
+let hasSideEffects = 0, mayLoad = 1, isNotDuplicable = 1, SchedRW = [WriteJump] in
+  def MOVGOT64r : PseudoI<(outs GR64:$reg),
+                          (ins GR64:$scratch, i64i32imm_pcrel:$got), []>;
 
 // ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
 // a stack adjustment and the codegen must know that they may modify the stack
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index ee0a6e270a3..c5d10fc133a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -5851,7 +5851,9 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
 /// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
 ///
 unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
-  assert(!Subtarget.is64Bit() &&
+  assert((!Subtarget.is64Bit() ||
+          MF->getTarget().getCodeModel() == CodeModel::Medium ||
+          MF->getTarget().getCodeModel() == CodeModel::Large) &&
          "X86-64 PIC uses RIP relative addressing");
 
   X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
@@ -5862,7 +5864,8 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   // Create the register. The code to initialize it is inserted
   // later, by the CGBR pass (below).
   MachineRegisterInfo &RegInfo = MF->getRegInfo();
-  GlobalBaseReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+  GlobalBaseReg = RegInfo.createVirtualRegister(
+      Subtarget.is64Bit() ? &X86::GR64_NOSPRegClass : &X86::GR32_NOSPRegClass);
   X86FI->setGlobalBaseReg(GlobalBaseReg);
   return GlobalBaseReg;
 }
@@ -7321,9 +7324,10 @@ namespace {
         static_cast<const X86TargetMachine *>(&MF.getTarget());
       const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
 
-      // Don't do anything if this is 64-bit as 64-bit PIC
-      // uses RIP relative addressing.
-      if (STI.is64Bit())
+      // Don't do anything in the 64-bit small and kernel code models. They use
+      // RIP-relative addressing for everything.
+      if (STI.is64Bit() && (TM->getCodeModel() == CodeModel::Small ||
+                            TM->getCodeModel() == CodeModel::Kernel))
         return false;
 
       // Only emit a global base reg in PIC mode.
@@ -7350,17 +7354,41 @@ namespace {
       else
         PC = GlobalBaseReg;
 
-      // Operand of MovePCtoStack is completely ignored by asm printer. It's
-      // only used in JIT code emission as displacement to pc.
-      BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
-      // If we're using vanilla 'GOT' PIC style, we should use relative addressing
-      // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
-      if (STI.isPICStyleGOT()) {
-        // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
-        BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
-          .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
-                                        X86II::MO_GOT_ABSOLUTE_ADDRESS);
+      if (STI.is64Bit()) {
+        if (TM->getCodeModel() == CodeModel::Medium) {
+          // In the medium code model, use a RIP-relative LEA to materialize the
+          // GOT.
+          BuildMI(FirstMBB, MBBI, DL, TII->get(X86::LEA64r), PC)
+              .addReg(X86::RIP)
+              .addImm(0)
+              .addReg(0)
+              .addExternalSymbol("_GLOBAL_OFFSET_TABLE_")
+              .addReg(0);
+        } else if (TM->getCodeModel() == CodeModel::Large) {
+          // Loading the GOT in the large code model requires math with labels,
+          // so we use a pseudo instruction and expand it during MC emission.
+          unsigned Scratch = RegInfo.createVirtualRegister(&X86::GR64RegClass);
+          BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVGOT64r), PC)
+              .addReg(Scratch, RegState::Undef | RegState::Define)
+              .addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+        } else {
+          llvm_unreachable("unexpected code model");
+        }
+      } else {
+        // Operand of MovePCtoStack is completely ignored by asm printer. It's
+        // only used in JIT code emission as displacement to pc.
+        BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+        // If we're using vanilla 'GOT' PIC style, we should use relative
+        // addressing not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+        if (STI.isPICStyleGOT()) {
+          // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel],
+          // %some_register
+          BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+              .addReg(PC)
+              .addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+                                 X86II::MO_GOT_ABSOLUTE_ADDRESS);
+        }
       }
 
       return true;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 9ca5de6c30b..d38c7b49796 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1720,6 +1720,41 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  case X86::MOVGOT64r: {
+    // Materializes the GOT for the 64-bit large code model.
+    MCSymbol *DotSym = OutContext.createTempSymbol();
+    OutStreamer->EmitLabel(DotSym);
+
+    unsigned DstReg = MI->getOperand(0).getReg();
+    unsigned ScratchReg = MI->getOperand(1).getReg();
+    MCSymbol *GOTSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
+
+    // .LtmpN: leaq .LtmpN(%rip), %dst
+    const MCExpr *DotExpr = MCSymbolRefExpr::create(DotSym, OutContext);
+    EmitAndCountInstruction(MCInstBuilder(X86::LEA64r)
+                                .addReg(DstReg)   // dest
+                                .addReg(X86::RIP) // base
+                                .addImm(1)        // scale
+                                .addReg(0)        // index
+                                .addExpr(DotExpr) // disp
+                                .addReg(0));      // seg
+
+    // movq $_GLOBAL_OFFSET_TABLE_ - .LtmpN, %scratch
+    const MCExpr *GOTSymExpr = MCSymbolRefExpr::create(GOTSym, OutContext);
+    const MCExpr *GOTDiffExpr =
+        MCBinaryExpr::createSub(GOTSymExpr, DotExpr, OutContext);
+    EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri)
+                                .addReg(ScratchReg)     // dest
+                                .addExpr(GOTDiffExpr)); // disp
+
+    // addq %scratch, %dst
+    EmitAndCountInstruction(MCInstBuilder(X86::ADD64rr)
+                                .addReg(DstReg)       // dest
+                                .addReg(DstReg)       // dest
+                                .addReg(ScratchReg)); // src
+    return;
+  }
+
   case X86::ADD32ri: {
     // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
     if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 8e1e35eebba..7e84323dda4 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -68,14 +68,36 @@ X86Subtarget::classifyGlobalReference(const GlobalValue *GV) const {
 
 unsigned char
 X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
-  // 64 bits can use %rip addressing for anything local.
-  if (is64Bit())
+  // If we're not PIC, it's not very interesting.
+  if (!isPositionIndependent())
     return X86II::MO_NO_FLAG;
 
-  // If this is for a position dependent executable, the static linker can
-  // figure it out.
-  if (!isPositionIndependent())
+  if (is64Bit()) {
+    // 64-bit ELF PIC local references may use GOTOFF relocations.
+    if (isTargetELF()) {
+      switch (TM.getCodeModel()) {
+      // 64-bit small code model is simple: All rip-relative.
+      case CodeModel::Small:
+      case CodeModel::Kernel:
+        return X86II::MO_NO_FLAG;
+
+      // The large PIC code model uses GOTOFF.
+      case CodeModel::Large:
+        return X86II::MO_GOTOFF;
+
+      // Medium is a hybrid: RIP-rel for code, GOTOFF for DSO local data.
+      case CodeModel::Medium:
+        if (isa<Function>(GV))
+          return X86II::MO_NO_FLAG; // All code is RIP-relative
+        return X86II::MO_GOTOFF;    // Local symbols use GOTOFF.
+      }
+      llvm_unreachable("invalid code model");
+    }
+
+    // Otherwise, this is either a RIP-relative reference or a 64-bit movabsq,
+    // both of which use MO_NO_FLAG.
     return X86II::MO_NO_FLAG;
+  }
 
   // The COFF dynamic linker just patches the executable sections.
   if (isTargetCOFF())
@@ -97,8 +119,8 @@ X86Subtarget::classifyLocalReference(const GlobalValue *GV) const {
 
 unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
                                                     const Module &M) const {
-  // Large model never uses stubs.
-  if (TM.getCodeModel() == CodeModel::Large)
+  // The static large model never uses stubs.
+  if (TM.getCodeModel() == CodeModel::Large && !isPositionIndependent())
     return X86II::MO_NO_FLAG;
 
   // Absolute symbols can be referenced directly.
@@ -120,8 +142,14 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
   if (isTargetCOFF())
     return X86II::MO_DLLIMPORT;
 
-  if (is64Bit())
+  if (is64Bit()) {
+    // ELF supports a large, truly PIC code model with non-PC relative GOT
+    // references. Other object file formats do not. Use the no-flag, 64-bit
+    // reference for them.
+    if (TM.getCodeModel() == CodeModel::Large)
+      return isTargetELF() ? X86II::MO_GOT : X86II::MO_NO_FLAG;
     return X86II::MO_GOTPCREL;
+  }
 
   if (isTargetDarwin()) {
     if (!isPositionIndependent())
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index b76e8c20409..374bf3daaf9 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -160,9 +160,15 @@ static std::string computeDataLayout(const Triple &TT) {
 }
 
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
+                                           bool JIT,
                                            Optional<Reloc::Model> RM) {
   bool is64Bit = TT.getArch() == Triple::x86_64;
   if (!RM.hasValue()) {
+    // JIT codegen should use static relocations by default, since it's
+    // typically executed in process and not relocatable.
+    if (JIT)
+      return Reloc::Static;
+
     // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
     // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
     // use static relocation model by default.
@@ -214,7 +220,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
                                    CodeGenOpt::Level OL, bool JIT)
     : LLVMTargetMachine(
           T, computeDataLayout(TT), TT, CPU, FS, Options,
-          getEffectiveRelocModel(TT, RM),
+          getEffectiveRelocModel(TT, JIT, RM),
           getEffectiveCodeModel(CM, JIT, TT.getArch() == Triple::x86_64), OL),
       TLOF(createTLOF(getTargetTriple())) {
   // Windows stack unwinder gets confused when execution flow "falls through"