summaryrefslogtreecommitdiffstats
path: root/lld/ELF
diff options
context:
space:
mode:
Diffstat (limited to 'lld/ELF')
-rw-r--r--lld/ELF/Arch/PPC64.cpp115
-rw-r--r--lld/ELF/Arch/X86_64.cpp10
-rw-r--r--lld/ELF/Config.h1
-rw-r--r--lld/ELF/Driver.cpp4
-rw-r--r--lld/ELF/InputSection.cpp6
-rw-r--r--lld/ELF/Options.td6
-rw-r--r--lld/ELF/Target.cpp5
-rw-r--r--lld/ELF/Target.h11
8 files changed, 146 insertions, 12 deletions
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index eb01955e093..b6408af0720 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -120,6 +120,9 @@ public:
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
+
+ bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const override;
};
} // namespace
@@ -213,6 +216,8 @@ PPC64::PPC64() {
TlsGotRel = R_PPC64_TPREL64;
+ NeedsMoreStackNonSplit = false;
+
// We need 64K pages (at least under glibc/Linux, the loader won't
// set different permissions on a finer granularity than that).
DefaultMaxPageSize = 65536;
@@ -761,7 +766,115 @@ void PPC64::relaxTlsGdToIe(uint8_t *Loc, RelType Type, uint64_t Val) const {
}
}
+// The prologue for a split-stack function is expected to look roughly
+// like this:
+// .Lglobal_entry_point:
+// # TOC pointer initalization.
+// ...
+// .Llocal_entry_point:
+// # load the __private_ss member of the threads tcbhead.
+// ld r0,-0x7000-64(r13)
+// # subtract the functions stack size from the stack pointer.
+// addis r12, r1, ha(-stack-frame size)
+// addi r12, r12, l(-stack-frame size)
+// # compare needed to actual and branch to allocate_more_stack if more
+// # space is needed, otherwise fallthrough to 'normal' function body.
+// cmpld cr7,r12,r0
+// blt- .Lallocate_more_stack
+//
+// -) The allocate_more_stack block might be placed after the split-stack
+// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`
+// instead.
+// -) If either the addis or addi is not needed due to the stack size being
+// smaller then 32K or a multiple of 64K they will be replaced with a nop,
+// but there will always be 2 instructions the linker can overwrite for the
+// adjusted stack size.
+//
+// The linkers job here is to increase the stack size used in the addis/addi
+// pair by split-stack-size-adjust.
+// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)
+// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)
+bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const {
+ // If the caller has a global entry point adjust the buffer past it. The start
+ // of the split-stack prologue will be at the local entry point.
+ Loc += getPPC64GlobalEntryToLocalEntryOffset(StOther);
+
+ // At the very least we expect to see a load of some split-stack data from the
+ // tcb, and 2 instructions that calculate the ending stack address this
+ // function will require. If there is not enough room for at least 3
+ // instructions it can't be a split-stack prologue.
+ if (Loc + 12 >= End)
+ return false;
+
+ // First instruction must be `ld r0, -0x7000-64(r13)`
+ if (read32(Loc) != 0xe80d8fc0)
+ return false;
+
+ int16_t HiImm = 0;
+ int16_t LoImm = 0;
+ // First instruction can be either an addis if the frame size is larger then
+ // 32K, or an addi if the size is less then 32K.
+ int32_t FirstInstr = read32(Loc + 4);
+ if (getPrimaryOpCode(FirstInstr) == 15) {
+ HiImm = FirstInstr & 0xFFFF;
+ } else if (getPrimaryOpCode(FirstInstr) == 14) {
+ LoImm = FirstInstr & 0xFFFF;
+ } else {
+ return false;
+ }
+
+ // Second instruction is either an addi or a nop. If the first instruction was
+ // an addi then LoImm is set and the second instruction must be a nop.
+ uint32_t SecondInstr = read32(Loc + 8);
+ if (!LoImm && getPrimaryOpCode(SecondInstr) == 14) {
+ LoImm = SecondInstr & 0xFFFF;
+ } else if (SecondInstr != 0x60000000) {
+ return false;
+ }
+
+ // The register operands of the first instruction should be the stack-pointer
+ // (r1) as the input (RA) and r12 as the output (RT). If the second
+ // instruction is not a nop, then it should use r12 as both input and output.
+ auto CheckRegOperands =
+ [](uint32_t Instr, uint8_t ExpectedRT, uint8_t ExpectedRA) {
+ return ((Instr & 0x3E00000) >> 21 == ExpectedRT) &&
+ ((Instr & 0x1F0000) >> 16 == ExpectedRA);
+ };
+ if (!CheckRegOperands(FirstInstr, 12, 1))
+ return false;
+ if (SecondInstr != 0x60000000 && !CheckRegOperands(SecondInstr, 12, 12))
+ return false;
+
+ int32_t StackFrameSize = (HiImm * 65536) + LoImm;
+ // Check that the adjusted size doesn't overflow what we can represent with 2
+ // instructions.
+ if (StackFrameSize < -2147483648 + Config->SplitStackAdjustSize) {
+ error(getErrorLocation(Loc) + "split-stack prologue adjustment overflows");
+ return false;
+ }
+
+ int32_t AdjustedStackFrameSize =
+ StackFrameSize - Config->SplitStackAdjustSize;
+
+ LoImm = AdjustedStackFrameSize & 0xFFFF;
+ HiImm = (AdjustedStackFrameSize + 0x8000) >> 16;
+ if (HiImm) {
+ write32(Loc + 4, 0x3D810000 | (uint16_t)HiImm);
+ // If the low immediate is zero the second instruction will be a nop.
+ SecondInstr =
+ LoImm ? 0x398C0000 | (uint16_t)LoImm : 0x60000000;
+ write32(Loc + 8, SecondInstr);
+ } else {
+ // addi r12, r1, imm
+ write32(Loc + 4, (0x39810000) | (uint16_t)LoImm);
+ write32(Loc + 8, 0x60000000);
+ }
+
+ return true;
+}
+
TargetInfo *elf::getPPC64TargetInfo() {
static PPC64 Target;
return &Target;
-}
+ }
diff --git a/lld/ELF/Arch/X86_64.cpp b/lld/ELF/Arch/X86_64.cpp
index 960bf782aa7..6421ec4e0c3 100644
--- a/lld/ELF/Arch/X86_64.cpp
+++ b/lld/ELF/Arch/X86_64.cpp
@@ -43,8 +43,8 @@ public:
void relaxTlsGdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsIeToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
void relaxTlsLdToLe(uint8_t *Loc, RelType Type, uint64_t Val) const override;
- bool adjustPrologueForCrossSplitStack(uint8_t *Loc,
- uint8_t *End) const override;
+ bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const override;
private:
void relaxGotNoPic(uint8_t *Loc, uint64_t Val, uint8_t Op,
@@ -482,7 +482,8 @@ namespace {
// B) Or a load of a stack pointer offset with an lea to r10 or r11.
template <>
bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
- uint8_t *End) const {
+ uint8_t *End,
+ uint8_t StOther) const {
if (Loc + 8 >= End)
return false;
@@ -509,7 +510,8 @@ bool X86_64<ELF64LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
template <>
bool X86_64<ELF32LE>::adjustPrologueForCrossSplitStack(uint8_t *Loc,
- uint8_t *End) const {
+ uint8_t *End,
+ uint8_t StOther) const {
llvm_unreachable("Target doesn't support split stacks.");
}
diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index f9fc10c28b9..de751afd002 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -217,6 +217,7 @@ struct Configuration {
unsigned LTOO;
unsigned Optimize;
unsigned ThinLTOJobs;
+ int32_t SplitStackAdjustSize;
// The following config options do not directly correspond to any
// particualr command line options.
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 1878d950050..147675e4826 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -829,6 +829,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
Config->SingleRoRx = Args.hasArg(OPT_no_rosegment);
Config->SoName = Args.getLastArgValue(OPT_soname);
Config->SortSection = getSortSection(Args);
+ Config->SplitStackAdjustSize = args::getInteger(Args, OPT_split_stack_adjust_size, 16384);
Config->Strip = getStrip(Args);
Config->Sysroot = Args.getLastArgValue(OPT_sysroot);
Config->Target1Rel = Args.hasFlag(OPT_target1_rel, OPT_target1_abs, false);
@@ -901,6 +902,9 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
if (Config->ThinLTOJobs == 0)
error("--thinlto-jobs: number of threads must be > 0");
+ if (Config->SplitStackAdjustSize < 0)
+ error("--split-stack-adjust-size: size must be >= 0");
+
// Parse ELF{32,64}{LE,BE} and CPU type.
if (auto *Arg = Args.getLastArg(OPT_m)) {
StringRef S = Arg->getValue();
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 9da4af60636..15fa2dcf718 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1009,7 +1009,7 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
if (Defined *F = getEnclosingFunction<ELFT>(Rel.Offset)) {
Prologues.insert(F);
if (Target->adjustPrologueForCrossSplitStack(Buf + getOffset(F->Value),
- End))
+ End, F->StOther))
continue;
if (!getFile<ELFT>()->SomeNoSplitStack)
error(lld::toString(this) + ": " + F->getName() +
@@ -1017,7 +1017,9 @@ void InputSectionBase::adjustSplitStackFunctionPrologues(uint8_t *Buf,
" (without -fsplit-stack), but couldn't adjust its prologue");
}
}
- switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
+
+ if (Target->NeedsMoreStackNonSplit)
+ switchMorestackCallsToMorestackNonSplit(Prologues, MorestackCalls);
}
template <class ELFT> void InputSection::writeTo(uint8_t *Buf) {
diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 67d3dc60860..1ff1abe0bf2 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -42,6 +42,12 @@ defm compress_debug_sections:
defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
+defm split_stack_adjust_size
+ : Eq<"split-stack-adjust-size",
+ "Specify adjustment to stack size when a split-stack function calls a "
+ "non-split-stack function">,
+ MetaVarName<"<value>">;
+
defm library_path:
Eq<"library-path", "Add a directory to the library search path">, MetaVarName<"<dir>">;
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp
index 0c8c940400e..4db7939b58b 100644
--- a/lld/ELF/Target.cpp
+++ b/lld/ELF/Target.cpp
@@ -132,12 +132,11 @@ bool TargetInfo::needsThunk(RelExpr Expr, RelType Type, const InputFile *File,
return false;
}
-bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc,
- uint8_t *End) const {
+bool TargetInfo::adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const {
llvm_unreachable("Target doesn't support split stacks.");
}
-
bool TargetInfo::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const {
return true;
}
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
index 1764963d614..4f310e364e3 100644
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -69,8 +69,10 @@ public:
// The function with a prologue starting at Loc was compiled with
// -fsplit-stack and it calls a function compiled without. Adjust the prologue
// to do the right thing. See https://gcc.gnu.org/wiki/SplitStacks.
- virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc,
- uint8_t *End) const;
+ // The symbols st_other flags are needed on PowerPC64 for determining the
+ // offset to the split-stack prologue.
+ virtual bool adjustPrologueForCrossSplitStack(uint8_t *Loc, uint8_t *End,
+ uint8_t StOther) const;
// Return true if we can reach Dst from Src with Relocation RelocType
virtual bool inBranchRange(RelType Type, uint64_t Src,
@@ -130,6 +132,11 @@ public:
// executable OutputSections.
uint32_t TrapInstr = 0;
+ // If a target needs to rewrite calls to __morestack to instead call
+ // __morestack_non_split when a split-stack enabled caller calls a
+ // non-split-stack callee this will return true. Otherwise returns false.
+ bool NeedsMoreStackNonSplit = true;
+
virtual RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data,
RelExpr Expr) const;
virtual void relaxGot(uint8_t *Loc, uint64_t Val) const;
OpenPOWER on IntegriCloud