[x86] extend machine combiner reassociation optimization to SSE scalar adds

Extend the reassociation optimization of http://reviews.llvm.org/rL240361 (D10460) to SSE scalar FP SP adds in addition to AVX scalar FP SP adds. With the 'switch' in place, we can trivially add other opcodes and test cases in future patches. Differential Revision: http://reviews.llvm.org/D10975 llvm-svn: 241515
author: Sanjay Patel <spatel@rotateright.com> 2015-07-06 22:35:29 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-07-06 22:35:29 +0000
commit: 681a56ac58681d78fbe273306240ff1e93fc57e9 (patch)
tree: 999e302da0abb47ec5667ea77ad7a80fc1a18eab /llvm/test/CodeGen/X86/machine-combiner.ll
parent: 8fbf1c1f4a6ffe8f070b23f41cfa2df668f4913a (diff)
download: bcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.tar.gz
bcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.zip
1 files changed, 89 insertions, 42 deletions
diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll
index d4cd59ffac3..545decb0311 100644
--- a/llvm/test/CodeGen/X86/machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner.ll
@@ -1,15 +1,23 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
 
 ; Verify that the first two adds are independent regardless of how the inputs are
 ; commuted. The destination registers are used as source registers for the third add.
 
 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds1:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: reassociate_adds1:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    addss %xmm3, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %t0 = fadd float %x0, %x1
   %t1 = fadd float %t0, %x2
   %t2 = fadd float %t1, %x3
@@ -17,12 +25,19 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
 }
 
 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds2:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: reassociate_adds2:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    addss %xmm3, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %t0 = fadd float %x0, %x1
   %t1 = fadd float %x2, %t0
   %t2 = fadd float %t1, %x3
@@ -30,12 +45,19 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
 }
 
 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds3:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: reassociate_adds3:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    addss %xmm3, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %t0 = fadd float %x0, %x1
   %t1 = fadd float %t0, %x2
   %t2 = fadd float %x3, %t1
@@ -43,12 +65,19 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
 }
 
 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds4:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: reassociate_adds4:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    addss %xmm3, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %t0 = fadd float %x0, %x1
   %t1 = fadd float %x2, %t0
   %t2 = fadd float %x3, %t1
@@ -59,16 +88,27 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
 ; produced because that would cost more compile time.
 
 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
-; CHECK-LABEL: reassociate_adds5:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm5, %xmm4, %xmm1
-; CHECK-NEXT:    vaddss %xmm6, %xmm1, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm7, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: reassociate_adds5:
+; SSE:       # BB#0:
+; SSE-NEXT:    addss %xmm1, %xmm0
+; SSE-NEXT:    addss %xmm3, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm0
+; SSE-NEXT:    addss %xmm5, %xmm4
+; SSE-NEXT:    addss %xmm6, %xmm4
+; SSE-NEXT:    addss %xmm4, %xmm0
+; SSE-NEXT:    addss %xmm7, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds5:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
+; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %t0 = fadd float %x0, %x1
   %t1 = fadd float %t0, %x2
   %t2 = fadd float %t1, %x3
@@ -83,14 +123,21 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
 ; Also, we should reassociate such that the result of the high latency division
 ; is used by the final 'add' rather than reassociating the %x3 operand with the
 ; division. The latter reassociation would not improve anything.
- 
+
 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-LABEL: reassociate_adds6:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    retq
+; SSE-LABEL: reassociate_adds6:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    addss %xmm3, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds6:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %t0 = fdiv float %x0, %x1
   %t1 = fadd float %x2, %t0
   %t2 = fadd float %x3, %t1
author	Sanjay Patel <spatel@rotateright.com>	2015-07-06 22:35:29 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-07-06 22:35:29 +0000
commit	681a56ac58681d78fbe273306240ff1e93fc57e9 (patch)
tree	999e302da0abb47ec5667ea77ad7a80fc1a18eab /llvm/test/CodeGen/X86/machine-combiner.ll
parent	8fbf1c1f4a6ffe8f070b23f41cfa2df668f4913a (diff)
download	bcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.tar.gz bcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.zip