diff options
author | Dean Michael Berris <dberris@google.com> | 2017-08-23 04:49:41 +0000 |
---|---|---|
committer | Dean Michael Berris <dberris@google.com> | 2017-08-23 04:49:41 +0000 |
commit | 0884b732202d043af4755d05aeb7fd3da8951a24 (patch) | |
tree | d0c2971364fbc3858cb0f2a72cb3f2f58901e42a /llvm/lib/Target/X86/X86MCInstLower.cpp | |
parent | 71f88a955d30645d082ddd42d2215226f3ea8744 (diff) | |
download | bcm5719-llvm-0884b732202d043af4755d05aeb7fd3da8951a24.tar.gz bcm5719-llvm-0884b732202d043af4755d05aeb7fd3da8951a24.zip |
[XRay][CodeGen] Use PIC-friendly code in XRay sleds; remove synthetic references in .text
Summary:
This change achieves two things:
- Redefine the Custom Event handling instrumentation points emitted by
the compiler to not require dynamic relocation of references to the
__xray_CustomEvent trampoline.
- Remove the synthetic reference we emit at the end of a function that
we used to keep auxiliary sections alive in favour of SHF_LINK_ORDER
associated with the section where the function is defined.
To achieve the custom event handling change, we've had to introduce the
concept of sled versioning -- this will need to be supported by the
runtime to allow us to understand how to turn on/off the new version of
the custom event handling sleds. That change has to land first before we
change the way we write the sleds.
To remove the synthetic reference, we rely on a relatively new linker
feature that preserves the sections that are associated with each other.
This allows us to limit the effects on the .text section of ELF
binaries.
Because we're still using absolute references that are resolved at
runtime for the instrumentation map (and function index) maps, we mark
these sections write-able. In the future we can re-define the entries in
the map to use relative relocations instead that can be statically
determined by the linker. That change will be a bit more invasive so we
defer this for later.
Depends on D36816.
Reviewers: dblaikie, echristo, pcc
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D36615
llvm-svn: 311525
Diffstat (limited to 'llvm/lib/Target/X86/X86MCInstLower.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86MCInstLower.cpp | 73 |
1 files changed, 38 insertions, 35 deletions
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index fd2837b7910..3ee1cf710c8 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1047,20 +1047,20 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, // We want to emit the following pattern, which follows the x86 calling // convention to prepare for the trampoline call to be patched in. // - // <args placement according SysV64 calling convention> // .p2align 1, ... // .Lxray_event_sled_N: - // jmp +N // jump across the call instruction - // callq __xray_CustomEvent // force relocation to symbol - // <args cleanup, jump to here> - // - // The relative jump needs to jump forward 24 bytes: - // 10 (args) + 5 (nops) + 9 (cleanup) + // jmp +N // jump across the instrumentation sled + // ... // set up arguments in register + // callq __xray_CustomEvent // force dependency to symbol + // ... + // <jump here> // // After patching, it would look something like: // // nopw (2-byte nop) + // ... // callq __xrayCustomEvent // already lowered + // ... // // --- // First we emit the label and the jump. @@ -1072,49 +1072,55 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. - OutStreamer->EmitBytes("\xeb\x14"); + OutStreamer->EmitBinaryData("\xeb\x0f"); // The default C calling convention will place two arguments into %rcx and // %rdx -- so we only work with those. - unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX}; - - // Because we will use %rax, we preserve that across the call. - EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX)); - - // Then we put the operands in the %rdi and %rsi registers. + unsigned UsedRegs[] = {X86::RDI, X86::RSI}; + bool UsedMask[] = {false, false}; + + // Then we put the operands in the %rdi and %rsi registers. We spill the + // values in the register before we clobber them, and mark them as used in + // UsedMask. In case the arguments are already in the correct register, we use + // emit nops appropriately sized to keep the sled the same size in every + // situation. for (unsigned I = 0; I < MI.getNumOperands(); ++I) if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { - if (Op->isImm()) - EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) + assert(Op->isReg() && "Only support arguments in registers"); + if (Op->getReg() != UsedRegs[I]) { + UsedMask[I] = true; + EmitAndCountInstruction( + MCInstBuilder(X86::PUSH64r).addReg(UsedRegs[I])); + EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) .addReg(UsedRegs[I]) - .addImm(Op->getImm())); - else if (Op->isReg()) { - if (Op->getReg() != UsedRegs[I]) - EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) - .addReg(UsedRegs[I]) - .addReg(Op->getReg())); - else - EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo()); + .addReg(Op->getReg())); + } else { + EmitNops(*OutStreamer, 4, Subtarget->is64Bit(), getSubtargetInfo()); } } // We emit a hard dependency on the __xray_CustomEvent symbol, which is the - // name of the trampoline to be implemented by the XRay runtime. We put this - // explicitly in the %rax register. + // name of the trampoline to be implemented by the XRay runtime. auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); - EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) - .addReg(X86::RAX) - .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); // Emit the call instruction. - EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX)); + EmitAndCountInstruction(MCInstBuilder(X86::CALL64pcrel32) + .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); // Restore caller-saved and used registers. + for (unsigned I = sizeof UsedMask; I-- > 0;) + if (UsedMask[I]) + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(UsedRegs[I])); + else + EmitNops(*OutStreamer, 1, Subtarget->is64Bit(), getSubtargetInfo()); + OutStreamer->AddComment("xray custom event end."); - EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX)); - recordSled(CurSled, MI, SledKind::CUSTOM_EVENT); + // Record the sled version. Older versions of this sled were spelled + // differently, so we let the runtime handle the different offsets we're + // using. + recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 1); } void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, @@ -1125,7 +1131,6 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, // .Lxray_sled_N: // jmp .tmpN // # 9 bytes worth of noops - // .tmpN // // We need the 9 bytes because at runtime, we'd be patching over the full 11 // bytes with the following pattern: @@ -1136,14 +1141,12 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->EmitCodeAlignment(2); OutStreamer->EmitLabel(CurSled); - auto Target = OutContext.createTempSymbol(); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as // an operand (computed as an offset from the jmp instruction). // FIXME: Find another less hacky way do force the relative jump. OutStreamer->EmitBytes("\xeb\x09"); EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo()); - OutStreamer->EmitLabel(Target); recordSled(CurSled, MI, SledKind::FUNCTION_ENTER); } |