diff options
| author | Alexey Bataev <a.bataev@hotmail.com> | 2015-11-19 11:44:35 +0000 | 
|---|---|---|
| committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-11-19 11:44:35 +0000 | 
| commit | b7b82bf33e0867c4946cc900b2ee94f6b4ad51f9 (patch) | |
| tree | aa1b7a57d81d28d1dc0bd80c7fc3db8bffcf24d0 /llvm | |
| parent | 26da45c2bb0c44499583b14be04bc506df439f0a (diff) | |
| download | bcm5719-llvm-b7b82bf33e0867c4946cc900b2ee94f6b4ad51f9.tar.gz bcm5719-llvm-b7b82bf33e0867c4946cc900b2ee94f6b4ad51f9.zip  | |
Alternative to long nops for X86 CPUs, by Andrey Turetsky
Make X86AsmBackend generate smarter nops instead of a bunch of 0x90 for code alignment for CPUs which don't support long nop instructions.
Differential Revision: http://reviews.llvm.org/D14178
llvm-svn: 253557
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 46 | ||||
| -rw-r--r-- | llvm/test/MC/X86/x86_nop.s | 8 | 
2 files changed, 33 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 00a5d3404ce..2eba084fc14 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -69,15 +69,19 @@ public:  class X86AsmBackend : public MCAsmBackend {    const StringRef CPU;    bool HasNopl; -  const uint64_t MaxNopLength; +  uint64_t MaxNopLength;  public: -  X86AsmBackend(const Target &T, StringRef CPU) -      : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) { +  X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) {      HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&                CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&                CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&                CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&                CPU != "c3" && CPU != "c3-2"; +    // Max length of true long nop instruction is 15 bytes. +    // Max length of long nop replacement instruction is 7 bytes. +    // Taking into account SilverMont architecture features max length of nops +    // is reduced for it to achieve better performance. +    MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15;    }    unsigned getNumFixupKinds() const override { @@ -279,7 +283,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {  /// bytes.  /// \return - true on success, false on failure  bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { -  static const uint8_t Nops[10][10] = { +  static const uint8_t TrueNops[10][10] = {      // nop      {0x90},      // xchg %ax,%ax @@ -302,17 +306,31 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {      {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},    }; -  // This CPU doesn't support long nops. If needed add more. -  // FIXME: Can we get this from the subtarget somehow? -  // FIXME: We could generated something better than plain 0x90. -  if (!HasNopl) { -    for (uint64_t i = 0; i < Count; ++i) -      OW->write8(0x90); -    return true; -  } +  // Alternative nop instructions for CPUs which don't support long nops. +  static const uint8_t AltNops[7][10] = { +      // nop +      {0x90}, +      // xchg %ax,%ax +      {0x66, 0x90}, +      // lea 0x0(%esi),%esi +      {0x8d, 0x76, 0x00}, +      // lea 0x0(%esi),%esi +      {0x8d, 0x74, 0x26, 0x00}, +      // nop + lea 0x0(%esi),%esi +      {0x90, 0x8d, 0x74, 0x26, 0x00}, +      // lea 0x0(%esi),%esi +      {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 }, +      // lea 0x0(%esi),%esi +      {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00}, +  }; + +  // Select the right NOP table. +  // FIXME: Can we get if CPU supports long nops from the subtarget somehow? +  const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops; +  assert(HasNopl || MaxNopLength <= 7); -  // 15 is the longest single nop instruction.  Emit as many 15-byte nops as -  // needed, then emit a nop of the remaining length. +  // Emit as many largest nops as needed, then emit a nop of the remaining +  // length.    do {      const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);      const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; diff --git a/llvm/test/MC/X86/x86_nop.s b/llvm/test/MC/X86/x86_nop.s index 572487bfdac..feac4e4cf03 100644 --- a/llvm/test/MC/X86/x86_nop.s +++ b/llvm/test/MC/X86/x86_nop.s @@ -22,13 +22,7 @@ inc %eax  inc %eax  // CHECK: 0:	40                                           	incl	%eax -// CHECK: 1:	90                                           	nop -// CHECK: 2:	90                                           	nop -// CHECK: 3:	90                                           	nop -// CHECK: 4:	90                                           	nop -// CHECK: 5:	90                                           	nop -// CHECK: 6:	90                                           	nop -// CHECK: 7:	90                                           	nop +// CHECK: 1:	8d b4 26 00 00 00 00                            leal (%esi), %esi  // CHECK: 8:	40                                           	incl	%eax  | 

