diff options
author | David Sehr <sehr@google.com> | 2013-03-05 00:02:23 +0000 |
---|---|---|
committer | David Sehr <sehr@google.com> | 2013-03-05 00:02:23 +0000 |
commit | 4c8979cd4d19814b8de2e45bd4cc42a5d99ebe19 (patch) | |
tree | 0c1a536f13bd7189f92e7d89cd00ddd75b63bbb9 /llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | |
parent | 85707b28e865b7b5baef750418f587956035ffe5 (diff) | |
download | bcm5719-llvm-4c8979cd4d19814b8de2e45bd4cc42a5d99ebe19.tar.gz bcm5719-llvm-4c8979cd4d19814b8de2e45bd4cc42a5d99ebe19.zip |
The current X86 NOP padding uses one long NOP followed by the remainder in
one-byte NOPs. If the processor actually executes those NOPs, as it sometimes
does with aligned bundling, this can have a performance impact. From my
micro-benchmarks run on my one machine, a 15-byte NOP followed by twelve
one-byte NOPs is about 20% worse than a 15 followed by a 12. This patch
changes NOP emission to emit as many 15-byte (the maximum) as possible followed
by at most one shorter NOP.
llvm-svn: 176464
Diffstat (limited to 'llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp')
-rw-r--r-- | llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index acc90ecebaf..598ddee56d2 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -315,18 +315,18 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } - // Write an optimal sequence for the first 15 bytes. - const uint64_t OptimalCount = (Count < 16) ? Count : 15; - const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10; - for (uint64_t i = 0, e = Prefixes; i != e; i++) - OW->Write8(0x66); - const uint64_t Rest = OptimalCount - Prefixes; - for (uint64_t i = 0, e = Rest; i != e; i++) - OW->Write8(Nops[Rest - 1][i]); - - // Finish with single byte nops. - for (uint64_t i = OptimalCount, e = Count; i != e; ++i) - OW->Write8(0x90); + // 15 is the longest single nop instruction. Emit as many 15-byte nops as + // needed, then emit a nop of the remaining length. + do { + const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15); + const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; + for (uint8_t i = 0; i < Prefixes; i++) + OW->Write8(0x66); + const uint8_t Rest = ThisNopLength - Prefixes; + for (uint8_t i = 0; i < Rest; i++) + OW->Write8(Nops[Rest - 1][i]); + Count -= ThisNopLength; + } while (Count != 0); return true; } |