diff options
| -rw-r--r-- | lld/ELF/Target.cpp | 60 | ||||
| -rw-r--r-- | lld/test/ELF/tls-opt.s | 8 |
2 files changed, 36 insertions, 32 deletions
diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp index 61a843b170f..2db522503c5 100644 --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -677,34 +677,40 @@ void X86_64TargetInfo::relaxTlsGdToIe(uint8_t *Loc, uint32_t Type, // R_X86_64_TPOFF32 so that it does not use GOT. void X86_64TargetInfo::relaxTlsIeToLe(uint8_t *Loc, uint32_t Type, uint64_t Val) const { - // Ulrich's document section 6.5 says that @gottpoff(%rip) must be - // used in MOVQ or ADDQ instructions only. - // "MOVQ foo@GOTTPOFF(%RIP), %REG" is transformed to "MOVQ $foo, %REG". - // "ADDQ foo@GOTTPOFF(%RIP), %REG" is transformed to "LEAQ foo(%REG), %REG" - // (if the register is not RSP/R12) or "ADDQ $foo, %RSP". - // Opcodes info can be found at http://ref.x86asm.net/coder64.html#x48. - uint8_t *Prefix = Loc - 3; - uint8_t *Inst = Loc - 2; - uint8_t *RegSlot = Loc - 1; + uint8_t *Inst = Loc - 3; uint8_t Reg = Loc[-1] >> 3; - bool IsMov = *Inst == 0x8b; - bool RspAdd = !IsMov && Reg == 4; - - // r12 and rsp registers requires special handling. - // Problem is that for other registers, for example leaq 0xXXXXXXXX(%r11),%r11 - // result out is 7 bytes: 4d 8d 9b XX XX XX XX, - // but leaq 0xXXXXXXXX(%r12),%r12 is 8 bytes: 4d 8d a4 24 XX XX XX XX. - // The same true for rsp. So we convert to addq for them, saving 1 byte that - // we dont have. - if (RspAdd) - *Inst = 0x81; - else - *Inst = IsMov ? 0xc7 : 0x8d; - if (*Prefix == 0x4c) - *Prefix = (IsMov || RspAdd) ? 0x49 : 0x4d; - *RegSlot = (IsMov || RspAdd) ? (0xc0 | Reg) : (0x80 | Reg | (Reg << 3)); - // The original code used a pc relative relocation and so we have to - // compensate for the -4 in had in the addend. + uint8_t RegSlot = Loc - 1; + + // Note that LEA with RSP or R12 is converted to ADD instead of LEA + // because LEA with these registers needs 4 bytes to encode and thus + // wouldn't fit the space. + + if (memcmp(Inst, "\x48\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp" + memcpy(Inst, "\x48\x81\xc4", 3); + } else if (memcmp(Inst, "\x4c\x03\x25", 3) == 0) { + // "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12" + memcpy(Inst, "\x49\x81\xc4", 3); + } else if (memcmp(Inst, "\x4c\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]" + memcpy(Inst, "\x4d\x8d", 2); + *RegSlot = 0x80 | (Reg << 3) | Reg; + } else if (memcmp(Inst, "\x48\x03", 2) == 0) { + // "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg" + memcpy(Inst, "\x48\x8d", 2); + *RegSlot = 0x80 | (Reg << 3) | Reg; + } else if (memcmp(Inst, "\x4c\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]" + memcpy(Inst, "\x49\xc7", 2); + *RegSlot = 0xc0 | Reg; + } else if (memcmp(Inst, "\x48\x8b", 2) == 0) { + // "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg" + memcpy(Inst, "\x48\xc7", 2); + *RegSlot = 0xc0 | Reg; + } + + // The original code used a PC relative relocation. + // Need to compensate for the -4 it had in the addend. relocateOne(Loc, R_X86_64_TPOFF32, Val + 4); } diff --git a/lld/test/ELF/tls-opt.s b/lld/test/ELF/tls-opt.s index c9b07a42eea..f9b2942e37d 100644 --- a/lld/test/ELF/tls-opt.s +++ b/lld/test/ELF/tls-opt.s @@ -20,11 +20,9 @@ // DISASM-NEXT: 1103f: 4d 8d bf fc ff ff ff leaq -4(%r15), %r15 // DISASM-NEXT: 11046: 48 81 c4 fc ff ff ff addq $-4, %rsp // DISASM-NEXT: 1104d: 49 81 c4 fc ff ff ff addq $-4, %r12 -// Corrupred output: -// DISASM-NEXT: 11054: 48 8d 80 f8 ff ff ff leaq -8(%rax), %rax -// DISASM-NEXT: 1105b: 48 d1 81 c4 fc ff ff rolq -828(%rcx) -// DISASM-NEXT: 11062: ff 48 d1 decl -47(%rax) -// DISASM-NEXT: 11065: 81 c4 fc ff ff ff addl $4294967292, %esp +// DISASM-NEXT: 11054: 48 87 05 f8 ff ff ff xchgq %rax, -8(%rip) +// DISASM-NEXT: 1105b: 48 d1 24 25 fc ff ff ff shlq -4 +// DISASM-NEXT: 11063: 48 d1 04 25 fc ff ff ff rolq -4 // LD to LE: // DISASM-NEXT: 1106b: 66 66 66 64 48 8b 04 25 00 00 00 00 movq %fs:0, %rax // DISASM-NEXT: 11077: 48 8d 88 f8 ff ff ff leaq -8(%rax), %rcx |

