summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/fp-fast.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2015-06-10 20:32:21 +0000
committerSanjay Patel <spatel@rotateright.com>2015-06-10 20:32:21 +0000
commit08829bac8160737d59a5e634758b2dbf18de67f1 (patch)
treed32665fe7dd8f774eabd644eace570f572ed469a /llvm/test/CodeGen/X86/fp-fast.ll
parente7bd6defd71d24e9b58f18462bb32c92a355c05b (diff)
downloadbcm5719-llvm-08829bac8160737d59a5e634758b2dbf18de67f1.tar.gz
bcm5719-llvm-08829bac8160737d59a5e634758b2dbf18de67f1.zip
[x86] Add a reassociation optimization to increase ILP via the MachineCombiner pass
This is a reimplementation of D9780 at the machine instruction level rather than the DAG. Use the MachineCombiner pass to reassociate scalar single-precision AVX additions (just a starting point; see the TODO comments) to increase ILP when it's safe to do so. The code is closely based on the existing MachineCombiner optimization that is implemented for AArch64. This patch should not cause the kind of spilling tragedy that led to the reversion of r236031. Differential Revision: http://reviews.llvm.org/D10321 llvm-svn: 239486
Diffstat (limited to 'llvm/test/CodeGen/X86/fp-fast.ll')
-rw-r--r--llvm/test/CodeGen/X86/fp-fast.ll78
1 files changed, 78 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/fp-fast.ll b/llvm/test/CodeGen/X86/fp-fast.ll
index 27af5738ca3..4f503af716a 100644
--- a/llvm/test/CodeGen/X86/fp-fast.ll
+++ b/llvm/test/CodeGen/X86/fp-fast.ll
@@ -114,3 +114,81 @@ define float @test11(float %a) {
ret float %t2
}
+; Verify that the first two adds are independent regardless of how the inputs are
+; commuted. The destination registers are used as source registers for the third add.
+
+define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
+; produced because that would cost more compile time.
+
+define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5:
+; CHECK: # BB#0:
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm5, %xmm4, %xmm1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vaddss %xmm7, %xmm6, %xmm1
+; CHECK-NEXT: vaddss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ %t3 = fadd float %t2, %x4
+ %t4 = fadd float %t3, %x5
+ %t5 = fadd float %t4, %x6
+ %t6 = fadd float %t5, %x7
+ ret float %t6
+}
OpenPOWER on IntegriCloud