summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/ARM/adv-copy-opt.ll
diff options
context:
space:
mode:
authorMatthias Braun <matze@braunis.de>2015-07-17 01:44:31 +0000
committerMatthias Braun <matze@braunis.de>2015-07-17 01:44:31 +0000
commit2d8315f8066bbce201e9b2c28b7d24915dcbe5f0 (patch)
tree6de810000cfdd7af9f988349786d0d911cf9932f /llvm/test/CodeGen/ARM/adv-copy-opt.ll
parentfb2398d0c43405a6b654c80560e38fb3ccd134b9 (diff)
downloadbcm5719-llvm-2d8315f8066bbce201e9b2c28b7d24915dcbe5f0.tar.gz
bcm5719-llvm-2d8315f8066bbce201e9b2c28b7d24915dcbe5f0.zip
ARM: Enable MachineScheduler and disable PostRAScheduler for swift.
This is mostly done to disable the PostRAScheduler which optimizes for instruction latencies which isn't a good fit for out-of-order architectures. This also allows to leave out the itinerary table in swift in favor of the SchedModel ones. This change leads to performance improvements/regressions by as much as 10% in some benchmarks, in fact we loose 0.4% performance over the llvm-testsuite for reasons that appear to be unknown or out of the compilers control. rdar://20803802 documents the investigation of these effects. While it is probably a good idea to perform the same switch for the other ARM out-of-order CPUs, I limited this change to swift as I cannot perform the benchmark verification on the other CPUs. Differential Revision: http://reviews.llvm.org/D10513 llvm-svn: 242500
Diffstat (limited to 'llvm/test/CodeGen/ARM/adv-copy-opt.ll')
-rw-r--r--llvm/test/CodeGen/ARM/adv-copy-opt.ll14
1 files changed, 7 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/ARM/adv-copy-opt.ll b/llvm/test/CodeGen/ARM/adv-copy-opt.ll
index f71bf78b62c..395be345720 100644
--- a/llvm/test/CodeGen/ARM/adv-copy-opt.ll
+++ b/llvm/test/CodeGen/ARM/adv-copy-opt.ll
@@ -11,25 +11,25 @@
; r0 = r0 / r2
; r1 = r1 / r3
;
-; NOOPT: vmov [[B:d[0-9]+]], r2, r3
-; NOOPT-NEXT: vmov [[A:d[0-9]+]], r0, r1
+; NOOPT: vmov [[A:d[0-9]+]], r0, r1
+; NOOPT-NEXT: vmov [[B:d[0-9]+]], r2, r3
; Move the low part of B into a register.
; Unfortunately, we cannot express that the 's' register is the low
; part of B, i.e., sIdx == BIdx x 2. E.g., B = d1, B_low = s2.
; NOOPT-NEXT: vmov [[B_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: vmov [[A_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv [[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
; NOOPT-NEXT: vmov [[B_HIGH:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: vmov [[A_LOW:r[0-9]+]], s{{[0-9]+}}
; NOOPT-NEXT: vmov [[A_HIGH:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv [[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
+; NOOPT-NEXT: udiv [[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
; NOOPT-NEXT: vmov.32 [[RES:d[0-9]+]][0], [[RES_LOW]]
+; NOOPT-NEXT: udiv [[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
; NOOPT-NEXT: vmov.32 [[RES]][1], [[RES_HIGH]]
; NOOPT-NEXT: vmov r0, r1, [[RES]]
; NOOPT-NEXT: bx lr
;
; OPT-NOT: vmov
-; OPT: udiv r0, r0, r2
-; OPT-NEXT: udiv r1, r1, r3
+; OPT: udiv r1, r1, r3
+; OPT-NEXT: udiv r0, r0, r2
; OPT-NEXT: bx lr
define <2 x i32> @simpleVectorDiv(<2 x i32> %A, <2 x i32> %B) nounwind {
entry:
OpenPOWER on IntegriCloud