[x86] Add a reassociation optimization to increase ILP via the MachineCombiner pass

This is a reimplementation of D9780 at the machine instruction level rather than the DAG. Use the MachineCombiner pass to reassociate scalar single-precision AVX additions (just a starting point; see the TODO comments) to increase ILP when it's safe to do so. The code is closely based on the existing MachineCombiner optimization that is implemented for AArch64. This patch should not cause the kind of spilling tragedy that led to the reversion of r236031. Differential Revision: http://reviews.llvm.org/D10321 llvm-svn: 239486
author: Sanjay Patel <spatel@rotateright.com> 2015-06-10 20:32:21 +0000
committer: Sanjay Patel <spatel@rotateright.com> 2015-06-10 20:32:21 +0000
commit: 08829bac8160737d59a5e634758b2dbf18de67f1 (patch)
tree: d32665fe7dd8f774eabd644eace570f572ed469a /llvm/test/CodeGen/X86/fp-fast.ll
parent: e7bd6defd71d24e9b58f18462bb32c92a355c05b (diff)
download: bcm5719-llvm-08829bac8160737d59a5e634758b2dbf18de67f1.tar.gz
bcm5719-llvm-08829bac8160737d59a5e634758b2dbf18de67f1.zip
1 files changed, 78 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/fp-fast.ll b/llvm/test/CodeGen/X86/fp-fast.ll
index 27af5738ca3..4f503af716a 100644
--- a/llvm/test/CodeGen/X86/fp-fast.ll
+++ b/llvm/test/CodeGen/X86/fp-fast.ll
@@ -114,3 +114,81 @@ define float @test11(float %a) {
   ret float %t2
 }
 
+; Verify that the first two adds are independent regardless of how the inputs are 
+; commuted. The destination registers are used as source registers for the third add.
+
+define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %t1, %x3
+  ret float %t2
+}
+
+define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %t1, %x3
+  ret float %t2
+}
+
+define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
+define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
+; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
+; produced because that would cost more compile time.
+
+define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm5, %xmm4, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm7, %xmm6, %xmm1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %t1, %x3
+  %t3 = fadd float %t2, %x4
+  %t4 = fadd float %t3, %x5
+  %t5 = fadd float %t4, %x6
+  %t6 = fadd float %t5, %x7
+  ret float %t6
+}
author	Sanjay Patel <spatel@rotateright.com>	2015-06-10 20:32:21 +0000
committer	Sanjay Patel <spatel@rotateright.com>	2015-06-10 20:32:21 +0000
commit	08829bac8160737d59a5e634758b2dbf18de67f1 (patch)
tree	d32665fe7dd8f774eabd644eace570f572ed469a /llvm/test/CodeGen/X86/fp-fast.ll
parent	e7bd6defd71d24e9b58f18462bb32c92a355c05b (diff)
download	bcm5719-llvm-08829bac8160737d59a5e634758b2dbf18de67f1.tar.gz bcm5719-llvm-08829bac8160737d59a5e634758b2dbf18de67f1.zip