summaryrefslogtreecommitdiffstats
path: root/llvm/test/MC
diff options
context:
space:
mode:
authorDavid Sehr <sehr@google.com>2013-03-05 00:02:23 +0000
committerDavid Sehr <sehr@google.com>2013-03-05 00:02:23 +0000
commit4c8979cd4d19814b8de2e45bd4cc42a5d99ebe19 (patch)
tree0c1a536f13bd7189f92e7d89cd00ddd75b63bbb9 /llvm/test/MC
parent85707b28e865b7b5baef750418f587956035ffe5 (diff)
downloadbcm5719-llvm-4c8979cd4d19814b8de2e45bd4cc42a5d99ebe19.tar.gz
bcm5719-llvm-4c8979cd4d19814b8de2e45bd4cc42a5d99ebe19.zip
The current X86 NOP padding uses one long NOP followed by the remainder in
one-byte NOPs. If the processor actually executes those NOPs, as it sometimes does with aligned bundling, this can have a performance impact. From my micro-benchmarks run on my one machine, a 15-byte NOP followed by twelve one-byte NOPs is about 20% worse than a 15 followed by a 12. This patch changes NOP emission to emit as many 15-byte (the maximum) as possible followed by at most one shorter NOP. llvm-svn: 176464
Diffstat (limited to 'llvm/test/MC')
-rw-r--r--llvm/test/MC/X86/AlignedBundling/long-nop-pad.s27
1 files changed, 27 insertions, 0 deletions
diff --git a/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s b/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s
new file mode 100644
index 00000000000..ea33e2889b9
--- /dev/null
+++ b/llvm/test/MC/X86/AlignedBundling/long-nop-pad.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+# RUN: | llvm-objdump -disassemble -no-show-raw-insn - | FileCheck %s
+
+# Test that long nops are generated for padding where possible.
+
+ .text
+foo:
+ .bundle_align_mode 5
+
+# This callq instruction is 5 bytes long
+ .bundle_lock align_to_end
+ callq bar
+ .bundle_unlock
+# To align this group to a bundle end, we need a 15-byte NOP and a 12-byte NOP.
+# CHECK: 0: nop
+# CHECK-NEXT: f: nop
+# CHECK-NEXT: 1b: callq
+
+# This push instruction is 1 byte long
+ .bundle_lock align_to_end
+ push %rax
+ .bundle_unlock
+# To align this group to a bundle end, we need two 15-byte NOPs, and a 1-byte.
+# CHECK: 20: nop
+# CHECK-NEXT: 2f: nop
+# CHECK-NEXT: 3e: nop
+# CHECK-NEXT: 3f: pushq
OpenPOWER on IntegriCloud