8 files changed, 195 insertions, 3 deletions
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 279c2300565..ed7d5c7f01f 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -46,6 +46,10 @@ public:
 
   const support::endianness Endian;
 
+  /// Return true if this target might automatically pad instructions and thus
+  /// need to emit padding enable/disable directives around sensative code.
+  virtual bool allowAutoPadding() const { return false; }
+
   /// Give the target a chance to manipulate state related to instruction
   /// alignment (e.g. padding for optimization) before and after actually
   /// emitting the instruction.
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index aab511a1e45..ba1649d33d1 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -223,6 +223,13 @@ class MCStreamer {
 
   bool UseAssemblerInfoForParsing;
 
+  /// Is the assembler allowed to insert padding automatically?  For
+  /// correctness reasons, we sometimes need to ensure instructions aren't
+  /// seperated in unexpected ways.  At the moment, this feature is only
+  /// useable from an integrated assembler, but assembly syntax is under
+  /// discussion for future inclusion.
+  bool AllowAutoPadding = false;
+
 protected:
   MCStreamer(MCContext &Ctx);
 
@@ -267,6 +274,9 @@ public:
     return TargetStreamer.get();
   }
 
+  void setAllowAutoPadding(bool v) { AllowAutoPadding = v; }
+  bool getAllowAutoPadding() const { return AllowAutoPadding; }
+
   /// When emitting an object file, create and emit a real label. When emitting
   /// textual assembly, this should do nothing to avoid polluting our output.
   virtual MCSymbol *EmitCFILabel();
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index d220c650b8e..5d369503995 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -77,6 +77,8 @@ public:
     assert(InstPrinter);
     if (IsVerboseAsm)
         InstPrinter->setCommentStream(CommentStream);
+    if (Assembler->getBackendPtr())
+      setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
   }
 
   MCAssembler &getAssembler() { return *Assembler; }
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index d8a7ab043eb..3d1358df475 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -29,7 +29,10 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context,
     : MCStreamer(Context),
       Assembler(std::make_unique<MCAssembler>(
           Context, std::move(TAB), std::move(Emitter), std::move(OW))),
-      EmitEHFrame(true), EmitDebugFrame(false) {}
+      EmitEHFrame(true), EmitDebugFrame(false) {
+  if (Assembler->getBackendPtr())
+    setAllowAutoPadding(Assembler->getBackend().allowAutoPadding());
+}
 
 MCObjectStreamer::~MCObjectStreamer() {}
 
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 623aff088b1..85e24dc98f5 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -155,6 +155,7 @@ public:
     AlignBranchType = X86AlignBranchKindLoc;
   }
 
+  bool allowAutoPadding() const override;
   void alignBranchesBegin(MCObjectStreamer &OS, const MCInst &Inst) override;
   void alignBranchesEnd(MCObjectStreamer &OS, const MCInst &Inst) override;
 
@@ -410,10 +411,15 @@ static bool hasVariantSymbol(const MCInst &MI) {
   return false;
 }
 
+bool X86AsmBackend::allowAutoPadding() const {
+  return (AlignBoundary != Align::None() &&
+          AlignBranchType != X86::AlignBranchNone);
+}
+
 bool X86AsmBackend::needAlign(MCObjectStreamer &OS) const {
-  if (AlignBoundary == Align::None() ||
-      AlignBranchType == X86::AlignBranchNone)
+  if (!OS.getAllowAutoPadding())
     return false;
+  assert(allowAutoPadding() && "incorrect initialization!");
 
   MCAssembler &Assembler = OS.getAssembler();
   MCSection *Sec = OS.getCurrentSectionOnly();
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index e6b6bd70faf..3cb911563c2 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -1142,10 +1142,35 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit,
   }
 }
 
+/// A RAII helper which defines a region of instructions which can't have
+/// padding added between them for correctness.
+struct NoAutoPaddingScope {
+  MCStreamer &OS;
+  const bool OldAllowAutoPadding;
+  NoAutoPaddingScope(MCStreamer &OS)
+    : OS(OS), OldAllowAutoPadding(OS.getAllowAutoPadding()) {
+    changeAndComment(false);
+  }
+  ~NoAutoPaddingScope() {
+    changeAndComment(OldAllowAutoPadding);
+  }
+  void changeAndComment(bool b) {
+    if (b == OS.getAllowAutoPadding())
+      return;
+    OS.setAllowAutoPadding(b);
+    if (b)
+      OS.emitRawComment("autopadding");
+    else
+      OS.emitRawComment("noautopadding");
+  }
+};
+
 void X86AsmPrinter::LowerSTATEPOINT(const MachineInstr &MI,
                                     X86MCInstLower &MCIL) {
   assert(Subtarget->is64Bit() && "Statepoint currently only supports X86-64");
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   StatepointOpers SOpers(&MI);
   if (unsigned PatchBytes = SOpers.getNumPatchBytes()) {
     EmitNops(*OutStreamer, PatchBytes, Subtarget->is64Bit(),
@@ -1207,6 +1232,8 @@ void X86AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI,
   // FAULTING_LOAD_OP <def>, <faltinf type>, <MBB handler>,
   //                  <opcode>, <operands>
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   Register DefRegister = FaultingMI.getOperand(0).getReg();
   FaultMaps::FaultKind FK =
       static_cast<FaultMaps::FaultKind>(FaultingMI.getOperand(1).getImm());
@@ -1254,6 +1281,8 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
                                       X86MCInstLower &MCIL) {
   // PATCHABLE_OP minsize, opcode, operands
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   unsigned MinSize = MI.getOperand(0).getImm();
   unsigned Opcode = MI.getOperand(1).getImm();
 
@@ -1309,6 +1338,8 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
 
   SMShadowTracker.emitShadowPadding(*OutStreamer, getSubtargetInfo());
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   auto &Ctx = OutStreamer->getContext();
   MCSymbol *MILabel = Ctx.createTempSymbol();
   OutStreamer->EmitLabel(MILabel);
@@ -1368,6 +1399,8 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
                                               X86MCInstLower &MCIL) {
   assert(Subtarget->is64Bit() && "XRay custom events only supports X86-64");
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // We want to emit the following pattern, which follows the x86 calling
   // convention to prepare for the trampoline call to be patched in.
   //
@@ -1462,6 +1495,8 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
                                                     X86MCInstLower &MCIL) {
   assert(Subtarget->is64Bit() && "XRay typed events only supports X86-64");
 
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // We want to emit the following pattern, which follows the x86 calling
   // convention to prepare for the trampoline call to be patched in.
   //
@@ -1559,6 +1594,9 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
 
 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
                                                   X86MCInstLower &MCIL) {
+
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // We want to emit the following pattern:
   //
   //   .p2align 1, ...
@@ -1586,6 +1624,8 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
 
 void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
                                        X86MCInstLower &MCIL) {
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // Since PATCHABLE_RET takes the opcode of the return statement as an
   // argument, we use that to emit the correct form of the RET that we want.
   // i.e. when we see this:
@@ -1616,6 +1656,8 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
 
 void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI,
                                              X86MCInstLower &MCIL) {
+  NoAutoPaddingScope NoPadScope(*OutStreamer);
+
   // Like PATCHABLE_RET, we have the actual instruction in the operands to this
   // instruction so we lower that particular instruction and its operands.
   // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
diff --git a/llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll b/llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll
new file mode 100644
index 00000000000..bebbea199be
--- /dev/null
+++ b/llvm/test/CodeGen/X86/align-branch-boundary-noautopadding.ll
@@ -0,0 +1,36 @@
+; RUN: llc -verify-machineinstrs -O3 -mcpu=skylake -x86-align-branch-boundary=32 -x86-align-branch=call -filetype=obj < %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s
+
+;; This file is a companion to align-branch-boundary-suppressions.ll.
+;; It exists to demonstrate that suppressions are actually wired into the
+;; integrated assembler.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+define void @test_statepoint(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK: 1: callq
+; CHECK-NEXT: 6: callq
+; CHECK-NEXT: b: callq
+; CHECK-NEXT: 10: callq
+; CHECK-NEXT: 15: callq
+; CHECK-NEXT: 1a: callq
+; CHECK-NEXT: 1f: callq
+entry:
+  ; Each of these will be 5 bytes, pushing the statepoint to offset=30.
+  ; For a normal call, this would force padding between the last normal
+  ; call and the safepoint, but since we've suppressed alignment that won't
+  ; happen for the safepoint.  That's non-ideal, we'd really prefer to do
+  ; the alignment and just keep the label with the statepoint call. (TODO)
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call void @foo()
+  call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+declare void @foo()
+declare zeroext i1 @return_i1()
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
diff --git a/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll b/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
new file mode 100644
index 00000000000..7db20187d45
--- /dev/null
+++ b/llvm/test/CodeGen/X86/align-branch-boundary-suppressions.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -O3 -enable-implicit-null-checks -mcpu=skylake -x86-align-branch-boundary=32 -x86-align-branch=call+jmp+indirect+ret+jcc < %s | FileCheck %s
+
+;; The tests in this file check that various constructs which need to disable
+;; prefix and/or nop padding do so in the right places.  However, since we
+;; don't yet have assembler syntax for this, they're only able to check
+;; comments and must hope the assembler does the right thing.
+
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+; If we have autopadding enabled, make sure the label isn't separated from
+; the mov.
+define i32 @implicit_null_check(i32* %x) {
+; CHECK-LABEL: implicit_null_check:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    #noautopadding
+; CHECK-NEXT:  .Ltmp0:
+; CHECK-NEXT:    movl (%rdi), %eax # on-fault: .LBB0_1
+; CHECK-NEXT:    #autopadding
+; CHECK-NEXT:  # %bb.2: # %not_null
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_1: # %is_null
+; CHECK-NEXT:    movl $42, %eax
+; CHECK-NEXT:    retq
+
+ entry:
+  %c = icmp eq i32* %x, null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !{}
+
+ is_null:
+  ret i32 42
+
+ not_null:
+  %t = load atomic i32, i32* %x unordered, align 4
+  ret i32 %t
+}
+
+; Label must bind to call before
+define void @test_statepoint(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test_statepoint:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    #noautopadding
+; CHECK-NEXT:    callq return_i1
+; CHECK-NEXT:  .Ltmp1:
+; CHECK-NEXT:    #autopadding
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+entry:
+  call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+  ret void
+}
+
+declare zeroext i1 @return_i1()
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+
+
+; Label must bind to following nop sequence
+define void @patchpoint(i64 %a, i64 %b) {
+; CHECK-LABEL: patchpoint:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    .cfi_def_cfa_register %rbp
+; CHECK-NEXT:    #noautopadding
+; CHECK-NEXT:  .Ltmp2:
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    .byte 102
+; CHECK-NEXT:    nopw %cs:512(%rax,%rax)
+; CHECK-NEXT:    #autopadding
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
+; CHECK-NEXT:    retq
+entry:
+  call void (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.void(i64 4, i32 15, i8* null, i32 0, i64 %a, i64 %b)
+  ret void
+}
+
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
+declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)