summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/machine-combiner.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2015-07-06 22:35:29 +0000
committerSanjay Patel <spatel@rotateright.com>2015-07-06 22:35:29 +0000
commit681a56ac58681d78fbe273306240ff1e93fc57e9 (patch)
tree999e302da0abb47ec5667ea77ad7a80fc1a18eab /llvm/test/CodeGen/X86/machine-combiner.ll
parent8fbf1c1f4a6ffe8f070b23f41cfa2df668f4913a (diff)
downloadbcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.tar.gz
bcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.zip
[x86] extend machine combiner reassociation optimization to SSE scalar adds
Extend the reassociation optimization of http://reviews.llvm.org/rL240361 (D10460) to SSE scalar FP SP adds in addition to AVX scalar FP SP adds. With the 'switch' in place, we can trivially add other opcodes and test cases in future patches. Differential Revision: http://reviews.llvm.org/D10975 llvm-svn: 241515
Diffstat (limited to 'llvm/test/CodeGen/X86/machine-combiner.ll')
-rw-r--r--llvm/test/CodeGen/X86/machine-combiner.ll131
1 files changed, 89 insertions, 42 deletions
diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll
index d4cd59ffac3..545decb0311 100644
--- a/llvm/test/CodeGen/X86/machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner.ll
@@ -1,15 +1,23 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds1:
-; CHECK: # BB#0:
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: reassociate_adds1:
+; SSE: # BB#0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: addss %xmm3, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds1:
+; AVX: # BB#0:
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
@@ -17,12 +25,19 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
}
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds2:
-; CHECK: # BB#0:
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: reassociate_adds2:
+; SSE: # BB#0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: addss %xmm3, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds2:
+; AVX: # BB#0:
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %t1, %x3
@@ -30,12 +45,19 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
}
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds3:
-; CHECK: # BB#0:
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: reassociate_adds3:
+; SSE: # BB#0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: addss %xmm3, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds3:
+; AVX: # BB#0:
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %x3, %t1
@@ -43,12 +65,19 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
}
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds4:
-; CHECK: # BB#0:
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: reassociate_adds4:
+; SSE: # BB#0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: addss %xmm3, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds4:
+; AVX: # BB#0:
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
@@ -59,16 +88,27 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
; produced because that would cost more compile time.
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
-; CHECK-LABEL: reassociate_adds5:
-; CHECK: # BB#0:
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1
-; CHECK-NEXT: vaddss %xmm6, %xmm1, %xmm1
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm7, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: reassociate_adds5:
+; SSE: # BB#0:
+; SSE-NEXT: addss %xmm1, %xmm0
+; SSE-NEXT: addss %xmm3, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm0
+; SSE-NEXT: addss %xmm5, %xmm4
+; SSE-NEXT: addss %xmm6, %xmm4
+; SSE-NEXT: addss %xmm4, %xmm0
+; SSE-NEXT: addss %xmm7, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds5:
+; AVX: # BB#0:
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1
+; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0
+; AVX-NEXT: retq
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
@@ -83,14 +123,21 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
; Also, we should reassociate such that the result of the high latency division
; is used by the final 'add' rather than reassociating the %x3 operand with the
; division. The latter reassociation would not improve anything.
-
+
define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds6:
-; CHECK: # BB#0:
-; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT: retq
+; SSE-LABEL: reassociate_adds6:
+; SSE: # BB#0:
+; SSE-NEXT: divss %xmm1, %xmm0
+; SSE-NEXT: addss %xmm3, %xmm2
+; SSE-NEXT: addss %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: reassociate_adds6:
+; AVX: # BB#0:
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
OpenPOWER on IntegriCloud