summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDean Michael Berris <dberris@google.com>2017-04-20 03:26:04 +0000
committerDean Michael Berris <dberris@google.com>2017-04-20 03:26:04 +0000
commit9df8ef55384ee0d5cce3d2a8b1e86d7ba6d07b94 (patch)
tree9e4ca457edc204052a8479f40c265eff4ad2fc14
parentfe56405467fa8cfdd5438f1f83690988045dcce4 (diff)
downloadbcm5719-llvm-9df8ef55384ee0d5cce3d2a8b1e86d7ba6d07b94.tar.gz
bcm5719-llvm-9df8ef55384ee0d5cce3d2a8b1e86d7ba6d07b94.zip
[XRay][compiler-rt] Cleanup CFI/CFA annotations on trampolines
Summary: This is a follow-up to D32202. While the previous change (D32202) did fix the stack alignment issue, we were still at a weird state in terms of the CFI/CFA directives (as the offsets were wrong). This change cleans up the SAVE/RESTORE macros for the trampoline, accounting the stack pointer adjustments with less instructions and with some clearer math. We note that the offsets will be different on the exit trampolines, because we don't typically 'call' into this trampoline and we only ever jump into them (i.e. treated as a tail call that's patched in at runtime). Reviewers: eugenis, kpw, pelikan Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D32214 llvm-svn: 300815
-rw-r--r--compiler-rt/lib/xray/xray_trampoline_x86_64.S33
1 files changed, 14 insertions, 19 deletions
diff --git a/compiler-rt/lib/xray/xray_trampoline_x86_64.S b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
index 772eafbecca..847ecef8d42 100644
--- a/compiler-rt/lib/xray/xray_trampoline_x86_64.S
+++ b/compiler-rt/lib/xray/xray_trampoline_x86_64.S
@@ -16,7 +16,12 @@
#include "../builtins/assembly.h"
.macro SAVE_REGISTERS
- subq $184, %rsp
+ subq $192, %rsp
+ .cfi_def_cfa_offset 200
+ // At this point, the stack pointer should be aligned to an 8-byte boundary,
+ // because any call instructions that come after this will add another 8
+ // bytes and therefore align it to 16-bytes.
+ movq %rbp, 184(%rsp)
movupd %xmm0, 168(%rsp)
movupd %xmm1, 152(%rsp)
movupd %xmm2, 136(%rsp)
@@ -35,6 +40,7 @@
.endm
.macro RESTORE_REGISTERS
+ movq 184(%rsp), %rbp
movupd 168(%rsp), %xmm0
movupd 152(%rsp), %xmm1
movupd 136(%rsp), %xmm2
@@ -50,7 +56,8 @@
movq 16(%rsp), %rcx
movq 8(%rsp), %r8
movq 0(%rsp), %r9
- addq $184, %rsp
+ addq $192, %rsp
+ .cfi_def_cfa_offset 8
.endm
.text
@@ -64,10 +71,7 @@
__xray_FunctionEntry:
.cfi_startproc
- pushq %rbp
- .cfi_def_cfa_offset 16
SAVE_REGISTERS
- .cfi_def_cfa_offset 200
// This load has to be atomic, it's concurrent with __xray_patch().
// On x86/amd64, a simple (type-aligned) MOV instruction is enough.
@@ -81,7 +85,6 @@ __xray_FunctionEntry:
callq *%rax
.Ltmp0:
RESTORE_REGISTERS
- popq %rbp
retq
.Ltmp1:
.size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
@@ -97,10 +100,9 @@ __xray_FunctionExit:
// Save the important registers first. Since we're assuming that this
// function is only jumped into, we only preserve the registers for
// returning.
- pushq %rbp
- .cfi_def_cfa_offset 16
- subq $48, %rsp
+ subq $56, %rsp
.cfi_def_cfa_offset 64
+ movq %rbp, 48(%rsp)
movupd %xmm0, 32(%rsp)
movupd %xmm1, 16(%rsp)
movq %rax, 8(%rsp)
@@ -114,12 +116,13 @@ __xray_FunctionExit:
callq *%rax
.Ltmp2:
// Restore the important registers.
+ movq 48(%rsp), %rbp
movupd 32(%rsp), %xmm0
movupd 16(%rsp), %xmm1
movq 8(%rsp), %rax
movq 0(%rsp), %rdx
- addq $48, %rsp
- popq %rbp
+ addq $56, %rsp
+ .cfi_def_cfa_offset 8
retq
.Ltmp3:
.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
@@ -136,10 +139,7 @@ __xray_FunctionTailExit:
// this is an exit. In the future, we will introduce a new entry type that
// differentiates between a normal exit and a tail exit, but we'd have to do
// this and increment the version number for the header.
- pushq %rbp
- .cfi_def_cfa_offset 16
SAVE_REGISTERS
- .cfi_def_cfa_offset 200
movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax
testq %rax,%rax
@@ -151,7 +151,6 @@ __xray_FunctionTailExit:
.Ltmp4:
RESTORE_REGISTERS
- popq %rbp
retq
.Ltmp5:
.size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit
@@ -164,10 +163,7 @@ __xray_FunctionTailExit:
.type __xray_ArgLoggerEntry,@function
__xray_ArgLoggerEntry:
.cfi_startproc
- pushq %rbp
- .cfi_def_cfa_offset 16
SAVE_REGISTERS
- .cfi_def_cfa_offset 200
// Again, these function pointer loads must be atomic; MOV is fine.
movq _ZN6__xray13XRayArgLoggerE(%rip), %rax
@@ -187,7 +183,6 @@ __xray_ArgLoggerEntry:
.Larg1entryFail:
RESTORE_REGISTERS
- popq %rbp
retq
.Larg1entryEnd:
OpenPOWER on IntegriCloud