summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFangrui Song <maskray@google.com>2019-05-10 05:51:00 +0000
committerFangrui Song <maskray@google.com>2019-05-10 05:51:00 +0000
commit6cdd68e386d98c0535e9f977849f376e1e6f55a2 (patch)
treed9b999eafd47e2ad7cc7cc4d313e58691ca00d12
parentc39a243da651991c5037dee7bae2d6e7d8c37196 (diff)
downloadbcm5719-llvm-6cdd68e386d98c0535e9f977849f376e1e6f55a2.tar.gz
bcm5719-llvm-6cdd68e386d98c0535e9f977849f376e1e6f55a2.zip
[PPC64] Define getThunkSectionSpacing() based on the range of R_PPC64_REL24
Suggested by Sean Fertile and Peter Smith. Thunk section spacing decrease the total number of thunks. I measured a decrease of 1% or less in some large programs, with no perceivable slowdown in link time. Override getThunkSectionSpacing() to enable it. 0x2000000 is the farthest point R_PPC64_REL24 can reach. I tried several numbers and found 0x2000000 works the best. Numbers near 0x2000000 work as well but let's just use the simpler number. As demonstrated by the updated tests, this essentially changes placement of most thunks to the end of the output section. We leverage this property to fix PR40740 reported by Alfredo Dal'Ava JĂșnior: The output section .init consists of input sections from several object files (crti.o crtbegin.o crtend.o crtn.o). Sections other than the last one do not have a terminator. With this patch, we create the thunk after the last .init input section and thus fix the issue. This is not foolproof but works quite well for such sections (with no terminator) in practice. Reviewed By: ruiu, sfertile Differential Revision: https://reviews.llvm.org/D61720 llvm-svn: 360405
-rw-r--r--lld/ELF/Arch/PPC64.cpp9
-rw-r--r--lld/test/ELF/ppc64-bsymbolic-toc-restore.s4
-rw-r--r--lld/test/ELF/ppc64-call-reach.s15
-rw-r--r--lld/test/ELF/ppc64-ifunc.s28
-rw-r--r--lld/test/ELF/ppc64-local-dynamic.s2
-rw-r--r--lld/test/ELF/ppc64-long-branch-init.s44
-rw-r--r--lld/test/ELF/ppc64-plt-stub.s11
-rw-r--r--lld/test/ELF/ppc64-toc-restore-recursive-call.s11
-rw-r--r--lld/test/ELF/ppc64-toc-restore.s36
9 files changed, 101 insertions, 59 deletions
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 12b4cec1a39..bc199fff45a 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -200,6 +200,7 @@ public:
void writeGotHeader(uint8_t *Buf) const override;
bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
uint64_t BranchAddr, const Symbol &S) const override;
+ uint32_t getThunkSectionSpacing() const override;
bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override;
RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const override;
@@ -885,6 +886,14 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
getPPC64GlobalEntryToLocalEntryOffset(S.StOther));
}
+uint32_t PPC64::getThunkSectionSpacing() const {
+ // See comment in Arch/ARM.cpp for a more detailed explanation of
+ // getThunkSectionSpacing(). For PPC64 we pick the constant here based on
+ // R_PPC64_REL24, which is used by unconditional branch instructions.
+ // 0x2000000 = (1 << 24-1) * 4
+ return 0x2000000;
+}
+
bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
int64_t Offset = Dst - Src;
if (Type == R_PPC64_REL14)
diff --git a/lld/test/ELF/ppc64-bsymbolic-toc-restore.s b/lld/test/ELF/ppc64-bsymbolic-toc-restore.s
index b7d9edd45d4..d467d22ff7b 100644
--- a/lld/test/ELF/ppc64-bsymbolic-toc-restore.s
+++ b/lld/test/ELF/ppc64-bsymbolic-toc-restore.s
@@ -53,7 +53,7 @@ caller:
# CHECK-LABEL: caller
# CHECK: bl .+44
# CHECK-NEXT: mr 31, 3
-# CHECK-NEXT: bl .-48
+# CHECK-NEXT: bl .+44
# CHECK-NEXT: ld 2, 24(1)
# CHECK-NEXT: add 3, 3, 31
# CHECK-NEXT: addi 1, 1, 32
@@ -63,6 +63,6 @@ caller:
# CHECK-EMPTY:
# CHECK-NEXT: def:
# CHECK-NEXT: addis 2, 12, 2
-# CHECK-NEXT: addi 2, 2, -32636
+# CHECK-NEXT: addi 2, 2, -32616
# CHECK-NEXT: li 3, 55
# CHECK-NEXT: blr
diff --git a/lld/test/ELF/ppc64-call-reach.s b/lld/test/ELF/ppc64-call-reach.s
index 980df2653d5..8a32af07acf 100644
--- a/lld/test/ELF/ppc64-call-reach.s
+++ b/lld/test/ELF/ppc64-call-reach.s
@@ -65,27 +65,24 @@ test:
# NEGOFFSET: 10010014: bl .-33554432
# NEGOFFSET: 10010024: b .+33554432
+# THUNK-LABEL: test:
+# THUNK: 10010014: bl .+20
+# THUNK: 10010024: b .+20
+
# .branch_lt[0]
# THUNK-LABEL: __long_branch_callee:
-# THUNK-NEXT: 10010000: addis 12, 2, 1
+# THUNK-NEXT: 10010028: addis 12, 2, 1
# THUNK-NEXT: ld 12, -32768(12)
# THUNK-NEXT: mtctr 12
# THUNK-NEXT: bctr
# .branch_lt[1]
# THUNK-LABEL: __long_branch_tail_callee:
-# THUNK-NEXT: 10010010: addis 12, 2, 1
+# THUNK-NEXT: 10010038: addis 12, 2, 1
# THUNK-NEXT: ld 12, -32760(12)
# THUNK-NEXT: mtctr 12
# THUNK-NEXT: bctr
-# Each call now branches to a thunk, and although it is printed as positive
-# the offset is interpreted as a signed 26 bit value so 67108812 is actually
-# -52.
-# THUNK-LABEL: test:
-# THUNK: 10010034: bl .-52
-# THUNK: 10010044: b .+67108812
-
# The offset from the TOC to the .branch_lt section is (-1 << 16) - 32768.
# Name Type Address Off Size
# BRANCHLT: .got PROGBITS 0000000010020000 020000 000008
diff --git a/lld/test/ELF/ppc64-ifunc.s b/lld/test/ELF/ppc64-ifunc.s
index 4bf50b98db1..32e317f3c05 100644
--- a/lld/test/ELF/ppc64-ifunc.s
+++ b/lld/test/ELF/ppc64-ifunc.s
@@ -15,11 +15,21 @@
# RUN: llvm-readelf -r %t | FileCheck --check-prefix=DYNREL %s
# NM-DAG: 0000000010028000 d .TOC.
-# NM-DAG: 0000000010010028 T ifunc
-# NM-DAG: 000000001001002c T ifunc2
+# NM-DAG: 0000000010010000 T ifunc
+# NM-DAG: 0000000010010004 T ifunc2
# SECTIONS: .plt NOBITS 0000000010030000
+# __plt_ifunc - . = 0x10010020 - 0x10010010 = 16
+# __plt_ifunc2 - . = 0x10010044 - 0x10010018 = 28
+# CHECK: _start:
+# CHECK-NEXT: addis 2, 12, 1
+# CHECK-NEXT: addi 2, 2, 32760
+# CHECK-NEXT: 10010010: bl .+16
+# CHECK-NEXT: ld 2, 24(1)
+# CHECK-NEXT: 10010018: bl .+28
+# CHECK-NEXT: ld 2, 24(1)
+
# .plt[0] - .TOC. = 0x10030000 - 0x10028000 = (1<<16) - 32768
# CHECK: __plt_ifunc:
# CHECK-NEXT: std 2, 24(1)
@@ -36,19 +46,9 @@
# CHECK-NEXT: mtctr 12
# CHECK-NEXT: bctr
-# __plt_ifunc - . = 0x10010000 - 0x10010038 = -56
-# __plt_ifunc2 - . = 0x10010014 - 0x10010040 = -44
-# CHECK: _start:
-# CHECK-NEXT: addis 2, 12, 1
-# CHECK-NEXT: addi 2, 2, 32720
-# CHECK-NEXT: 10010038: bl .-56
-# CHECK-NEXT: ld 2, 24(1)
-# CHECK-NEXT: 10010040: bl .-44
-# CHECK-NEXT: ld 2, 24(1)
-
# Check that we emit 2 R_PPC64_IRELATIVE.
-# DYNREL: R_PPC64_IRELATIVE 10010028
-# DYNREL: R_PPC64_IRELATIVE 1001002c
+# DYNREL: R_PPC64_IRELATIVE 10010000
+# DYNREL: R_PPC64_IRELATIVE 10010004
.type ifunc STT_GNU_IFUNC
.globl ifunc
diff --git a/lld/test/ELF/ppc64-local-dynamic.s b/lld/test/ELF/ppc64-local-dynamic.s
index 8a23863f67d..87e33b784b8 100644
--- a/lld/test/ELF/ppc64-local-dynamic.s
+++ b/lld/test/ELF/ppc64-local-dynamic.s
@@ -113,7 +113,7 @@ k:
// Dis: test:
// Dis: addis 3, 2, 0
// Dis-NEXT: addi 3, 3, -32760
-// Dis-NEXT: bl .-60
+// Dis-NEXT: bl .+60
// Dis-NEXT: ld 2, 24(1)
// Dis-NEXT: addis 3, 3, 0
// Dis-NEXT: lwa 3, -32768(3)
diff --git a/lld/test/ELF/ppc64-long-branch-init.s b/lld/test/ELF/ppc64-long-branch-init.s
new file mode 100644
index 00000000000..cc2f73c6666
--- /dev/null
+++ b/lld/test/ELF/ppc64-long-branch-init.s
@@ -0,0 +1,44 @@
+# REQUIRES: ppc
+
+# RUN: llvm-mc -filetype=obj -triple=powerpc64-pc-freebsd13.0 %s -o %t.o
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+
+## .init consists of sections from several object files. Sections other than the
+## last one do not have a terminator. Check we do not create a long branch stub
+## in the middle.
+## We currently use thunk section spacing to ensure the stub is in the end. This
+## is not foolproof but good enough to not break in practice.
+
+# CHECK: Disassembly of section .init:
+# CHECK-EMPTY:
+# CHECK-LABEL: _init:
+# CHECK: blr
+# CHECK-EMPTY:
+# CHECK-LABEL: __long_branch_foo:
+
+.globl foo
+foo:
+ .space 0x2000000
+ blr
+
+.section .init,"ax",@progbits,unique,0
+.globl _init
+_init:
+ stdu 1, -48(1)
+ mflr 0
+ std 0, 64(1)
+
+.section .init,"ax",@progbits,unique,1
+ bl foo
+ nop
+
+.section .init,"ax",@progbits,unique,2
+ bl foo
+ nop
+
+.section .init,"ax",@progbits,unique,3
+ ld 1, 0(1)
+ ld 0, 16(1)
+ mtlr 0
+ blr
diff --git a/lld/test/ELF/ppc64-plt-stub.s b/lld/test/ELF/ppc64-plt-stub.s
index f06ea90a03c..b2de161cebf 100644
--- a/lld/test/ELF/ppc64-plt-stub.s
+++ b/lld/test/ELF/ppc64-plt-stub.s
@@ -4,17 +4,20 @@
// RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
// RUN: ld.lld -shared %t2.o -o %t2.so
// RUN: ld.lld %t.o %t2.so -o %t
-// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
// RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o
// RUN: ld.lld -shared %t2.o -o %t2.so
// RUN: ld.lld %t.o %t2.so -o %t
-// RUN: llvm-objdump -d %t | FileCheck %s
+// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
// CHECK: Disassembly of section .text:
// CHECK-EMPTY:
-// CHECK-NEXT: __plt_foo:
+// CHECK-NEXT: _start:
+// CHECK: 10010008: bl .+16
+
+// CHECK-LABEL: 0000000010010018 __plt_foo:
// CHECK-NEXT: std 2, 24(1)
// CHECK-NEXT: addis 12, 2, 0
// CHECK-NEXT: ld 12, 32560(12)
@@ -22,8 +25,6 @@
// CHECK-NEXT: bctr
-// CHECK: _start:
-// CHECK: bl .-40
.text
.abiversion 2
.globl _start
diff --git a/lld/test/ELF/ppc64-toc-restore-recursive-call.s b/lld/test/ELF/ppc64-toc-restore-recursive-call.s
index 538b12c7c90..756a058cc56 100644
--- a/lld/test/ELF/ppc64-toc-restore-recursive-call.s
+++ b/lld/test/ELF/ppc64-toc-restore-recursive-call.s
@@ -14,12 +14,11 @@
# for recursive calls as well as keeps the logic for recursive calls consistent
# with non-recursive calls.
-# CHECK-LABEL: __plt_recursive_func:
-# CHECK-NEXT: 10000:
-# CHECK-LABEL: recursive_func
-# CHECK-NEXT: 10014:
-# CHECK: 1003c: bl .-60
-# CHECK-NEXT: 10040: ld 2, 24(1)
+# CHECK-LABEL: 0000000000010000 recursive_func:
+# CHECK: 10028: bl .+32
+# CHECK-NEXT: ld 2, 24(1)
+
+# CHECK-LABEL: 0000000000010048 __plt_recursive_func:
.abiversion 2
.section ".text"
diff --git a/lld/test/ELF/ppc64-toc-restore.s b/lld/test/ELF/ppc64-toc-restore.s
index 5f76f68b4d9..d65bef847a7 100644
--- a/lld/test/ELF/ppc64-toc-restore.s
+++ b/lld/test/ELF/ppc64-toc-restore.s
@@ -28,17 +28,11 @@ _start:
bl foo
nop
bl bar_local
-
-
-// CHECK: Disassembly of section .text:
+// CHECK-LABEL: _start:
+// CHECK-NEXT: 10010008: bl .+64
+// CHECK-NEXT: 1001000c: ld 2, 24(1)
+// CHECK-NEXT: 10010010: bl .-16
// CHECK-EMPTY:
-// CHECK: _start:
-// CHECK: 1001001c: bl .-28
-// CHECK-NOT: 10010020: nop
-// CHECK: 10010020: ld 2, 24(1)
-// CHECK: 10010024: bl .-16
-// CHECK-NOT: 10010028: nop
-// CHECK-NOT: 10010028: ld 2, 24(1)
# Calling a function in another object file which will have same
# TOC base does not need a nop. If nop present, do not rewrite to
@@ -48,26 +42,24 @@ _diff_object:
bl foo_not_shared
bl foo_not_shared
nop
-
-// CHECK: _diff_object:
-// CHECK-NEXT: 10010028: bl .+24
-// CHECK-NEXT: 1001002c: bl .+20
-// CHECK-NEXT: 10010030: nop
+// CHECK-LABEL: _diff_object:
+// CHECK-NEXT: 10010014: bl .+28
+// CHECK-NEXT: 10010018: bl .+24
+// CHECK-NEXT: 1001001c: nop
# Branching to a local function does not need a nop
.global noretbranch
noretbranch:
b bar_local
-// CHECK: noretbranch:
-// CHECK: 10010034: b .+67108832
-// CHECK-NOT: 10010038: nop
-// CHECK-NOT: 1001003c: ld 2, 24(1)
+// CHECK-LABEL: noretbranch:
+// CHECK: 10010020: b .+67108832
+// CHECK-EMPTY:
// This should come last to check the end-of-buffer condition.
.global last
last:
bl foo
nop
-// CHECK: last:
-// CHECK: 10010038: bl .-56
-// CHECK-NEXT: 1001003c: ld 2, 24(1)
+// CHECK-LABEL: last:
+// CHECK-NEXT: 10010024: bl .+36
+// CHECK-NEXT: 10010028: ld 2, 24(1)
OpenPOWER on IntegriCloud