diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2015-07-06 22:35:29 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2015-07-06 22:35:29 +0000 |
| commit | 681a56ac58681d78fbe273306240ff1e93fc57e9 (patch) | |
| tree | 999e302da0abb47ec5667ea77ad7a80fc1a18eab /llvm/test/CodeGen/X86/machine-combiner.ll | |
| parent | 8fbf1c1f4a6ffe8f070b23f41cfa2df668f4913a (diff) | |
| download | bcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.tar.gz bcm5719-llvm-681a56ac58681d78fbe273306240ff1e93fc57e9.zip | |
[x86] extend machine combiner reassociation optimization to SSE scalar adds
Extend the reassociation optimization of http://reviews.llvm.org/rL240361 (D10460)
to SSE scalar FP SP adds in addition to AVX scalar FP SP adds.
With the 'switch' in place, we can trivially add other opcodes and test cases in
future patches.
Differential Revision: http://reviews.llvm.org/D10975
llvm-svn: 241515
Diffstat (limited to 'llvm/test/CodeGen/X86/machine-combiner.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/machine-combiner.ll | 131 |
1 files changed, 89 insertions, 42 deletions
diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll index d4cd59ffac3..545decb0311 100644 --- a/llvm/test/CodeGen/X86/machine-combiner.ll +++ b/llvm/test/CodeGen/X86/machine-combiner.ll @@ -1,15 +1,23 @@ -; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE +; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX ; Verify that the first two adds are independent regardless of how the inputs are ; commuted. The destination registers are used as source registers for the third add. define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { -; CHECK-LABEL: reassociate_adds1: -; CHECK: # BB#0: -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: reassociate_adds1: +; SSE: # BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm3, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_adds1: +; AVX: # BB#0: +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %t1, %x3 @@ -17,12 +25,19 @@ define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { } define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { -; CHECK-LABEL: reassociate_adds2: -; CHECK: # BB#0: -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: reassociate_adds2: +; SSE: # BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm3, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_adds2: +; AVX: # BB#0: +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = fadd float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %t1, %x3 @@ -30,12 +45,19 @@ define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { } define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { -; CHECK-LABEL: reassociate_adds3: -; CHECK: # BB#0: -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: reassociate_adds3: +; SSE: # BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm3, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_adds3: +; AVX: # BB#0: +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %x3, %t1 @@ -43,12 +65,19 @@ define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { } define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { -; CHECK-LABEL: reassociate_adds4: -; CHECK: # BB#0: -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: reassociate_adds4: +; SSE: # BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm3, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_adds4: +; AVX: # BB#0: +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = fadd float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %x3, %t1 @@ -59,16 +88,27 @@ define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { ; produced because that would cost more compile time. define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { -; CHECK-LABEL: reassociate_adds5: -; CHECK: # BB#0: -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1 -; CHECK-NEXT: vaddss %xmm6, %xmm1, %xmm1 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm7, %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: reassociate_adds5: +; SSE: # BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm3, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm0 +; SSE-NEXT: addss %xmm5, %xmm4 +; SSE-NEXT: addss %xmm6, %xmm4 +; SSE-NEXT: addss %xmm4, %xmm0 +; SSE-NEXT: addss %xmm7, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_adds5: +; AVX: # BB#0: +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1 +; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %t1, %x3 @@ -83,14 +123,21 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa ; Also, we should reassociate such that the result of the high latency division ; is used by the final 'add' rather than reassociating the %x3 operand with the ; division. The latter reassociation would not improve anything. - + define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { -; CHECK-LABEL: reassociate_adds6: -; CHECK: # BB#0: -; CHECK-NEXT: vdivss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: retq +; SSE-LABEL: reassociate_adds6: +; SSE: # BB#0: +; SSE-NEXT: divss %xmm1, %xmm0 +; SSE-NEXT: addss %xmm3, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: reassociate_adds6: +; AVX: # BB#0: +; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = fdiv float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %x3, %t1 |

