[SystemZ] Avoid LER on z13 due to partial register dependencies

On the z13, it turns out to be more efficient to access a full floating-point register than just the upper half (as done e.g. by the LE and LER instructions). Current code already takes this into account when loading from memory by using the LDE instruction in place of LE. However, we still generate LER, which shows the same performance issues as LE in certain circumstances. This patch changes the back-end to emit LDR instead of LER to implement FP32 register-to-register copies on z13. llvm-svn: 263431
author: Ulrich Weigand <ulrich.weigand@de.ibm.com> 2016-03-14 13:50:03 +0000
committer: Ulrich Weigand <ulrich.weigand@de.ibm.com> 2016-03-14 13:50:03 +0000
commit: cdce026b4d44deee16f982f5db6afdb8215d77c1 (patch)
tree: 79e976cb96f51b6800f4faca899e0c9591a689a0 /llvm/test
parent: 00bd82cadec3a23fa524344366baa239877de6e7 (diff)
download: bcm5719-llvm-cdce026b4d44deee16f982f5db6afdb8215d77c1.tar.gz
bcm5719-llvm-cdce026b4d44deee16f982f5db6afdb8215d77c1.zip
3 files changed, 34 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-01.ll b/llvm/test/CodeGen/SystemZ/fp-move-01.ll
index 843b1b6a6e6..55c09e5d779 100644
--- a/llvm/test/CodeGen/SystemZ/fp-move-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-01.ll
@@ -1,7 +1,6 @@
 ; Test moves between FPRs.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
 ; Test f32 moves.
 define float @f1(float %a, float %b) {
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-12.ll b/llvm/test/CodeGen/SystemZ/fp-move-12.ll
new file mode 100644
index 00000000000..131f7c374ca
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/fp-move-12.ll
@@ -0,0 +1,33 @@
+; Test moves between FPRs on z13.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test that we use LDR instead of LER.
+define float @f1(float %a, float %b) {
+; CHECK-LABEL: f1:
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  ret float %b
+}
+
+; Test f64 moves.
+define double @f2(double %a, double %b) {
+; CHECK-LABEL: f2:
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+  ret double %b
+}
+
+; Test f128 moves.  Since f128s are passed by reference, we need to force
+; a copy by other means.
+define void @f3(fp128 *%x) {
+; CHECK-LABEL: f3:
+; CHECK: lxr
+; CHECK: axbr
+; CHECK: br %r14
+  %val = load volatile fp128 , fp128 *%x
+  %sum = fadd fp128 %val, %val
+  store volatile fp128 %sum, fp128 *%x
+  store volatile fp128 %val, fp128 *%x
+  ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll
index 4afad8bef65..9829bd02433 100644
--- a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll
@@ -52,7 +52,7 @@ define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) {
 ; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2
 ; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3
 ; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3
-; CHECK-DAG: ler %f[[A1copy:[0-5]]], %f[[A1]]
+; CHECK-DAG: ldr %f[[A1copy:[0-5]]], %f[[A1]]
 ; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]]
 ; CHECK-DAG: sebr %f[[B1]], %f[[B2]]
 ; CHECK-DAG: sebr %f[[C1]], %f[[C2]]
author	Ulrich Weigand <ulrich.weigand@de.ibm.com>	2016-03-14 13:50:03 +0000
committer	Ulrich Weigand <ulrich.weigand@de.ibm.com>	2016-03-14 13:50:03 +0000
commit	cdce026b4d44deee16f982f5db6afdb8215d77c1 (patch)
tree	79e976cb96f51b6800f4faca899e0c9591a689a0 /llvm/test
parent	00bd82cadec3a23fa524344366baa239877de6e7 (diff)
download	bcm5719-llvm-cdce026b4d44deee16f982f5db6afdb8215d77c1.tar.gz bcm5719-llvm-cdce026b4d44deee16f982f5db6afdb8215d77c1.zip