[SLP vectorizer] Allow phi node reordering in tryToVectorizeList.

In tryToVectorizeList, under a very limited circumstance (when entered from tryToVectorizePair), the values may be reordered (swapped) and the SLP tree is built with the new order. This extends that to the case when starting from phis in vectorizeChainsInBlock when there are exactly two phis. The textual order of phi nodes shouldn't really matter. Without this change, the loop body in the accompnaying test case is fully vectorized when we swap the orde of the phis but not with this order. While this doesn't solve the phi-ordering problem in a general way (for more than 2 phis), this is simple fix that piggybacks on an existing mechanism and is useful in cases like multiplying two complex numbers. Differential revision: https://reviews.llvm.org/D32065 llvm-svn: 300574
author: Easwaran Raman <eraman@google.com> 2017-04-18 18:16:57 +0000
committer: Easwaran Raman <eraman@google.com> 2017-04-18 18:16:57 +0000
commit: 76aba5f6d79c3f87950da0a78a9ee7dac83d4765 (patch)
tree: 9e0260667695b43a5987c7e5a63caf9e64dd5895 /llvm/test
parent: cc7a035a2e352a1147f034b66ee4ef276109d262 (diff)
download: bcm5719-llvm-76aba5f6d79c3f87950da0a78a9ee7dac83d4765.tar.gz
bcm5719-llvm-76aba5f6d79c3f87950da0a78a9ee7dac83d4765.zip
1 files changed, 54 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
new file mode 100644
index 00000000000..f7f58d7350b
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_phi.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -slp-vectorizer  -S -mtriple=x86_64-unknown -mcpu=corei7-avx | FileCheck %s
+
+%struct.complex = type { float, float }
+
+; CHECK-LABEL: void @foo
+define  void @foo (%struct.complex* %A, %struct.complex* %B, %struct.complex* %Result) {
+
+entry:
+  %0 = add i64 256, 0
+  br label %loop
+
+; CHECK-LABEL: loop
+; CHECK: [[REG0:%[0-9]+]] = phi <2 x float> {{.*}}[ [[REG1:%[0-9]+]], %loop ]
+; CHECK: [[REG2:%[0-9]+]] = load <2 x float>, <2 x float>*
+; CHECK: [[REG3:%[0-9]+]] = fmul <2 x float> [[REG2]]
+; CHECK: [[REG4:%[0-9]+]] = fmul <2 x float>
+; CHECK: fsub <2 x float> [[REG3]], [[REG4]]
+; CHECK: fadd <2 x float> [[REG3]], [[REG4]]
+; CHECK: shufflevector <2 x float>
+; CHECK: [[REG1]] = fadd <2 x float>{{.*}}[[REG0]]
+loop:
+
+  %1 = phi i64 [ 0, %entry ], [ %20, %loop ]
+  %2 = phi float [ 0.000000e+00, %entry ], [ %19, %loop ]
+  %3 = phi float [ 0.000000e+00, %entry ], [ %18, %loop ]
+  %4 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 0
+  %5 = load float, float* %4, align 4
+  %6 = getelementptr inbounds %"struct.complex", %"struct.complex"* %A, i64 %1, i32 1
+  %7 = load float, float* %6, align 4
+  %8 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 0
+  %9 = load float, float* %8, align 4
+  %10 = getelementptr inbounds %"struct.complex", %"struct.complex"* %B, i64 %1, i32 1
+  %11 = load float, float* %10, align 4
+  %12 = fmul float %5, %9
+  %13 = fmul float %7, %11
+  %14 = fsub float %12, %13
+  %15 = fmul float %7, %9
+  %16 = fmul float %5, %11
+  %17 = fadd float %15, %16
+  %18 = fadd float %3, %14
+  %19 = fadd float %2, %17
+  %20 = add nuw nsw i64 %1, 1
+  %21 = icmp eq i64 %20, %0
+  br i1 %21, label %exit, label %loop
+
+exit:
+  %22 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result,  i32 0, i32 0
+  store float %18, float* %22, align 4
+  %23 = getelementptr inbounds %"struct.complex", %"struct.complex"* %Result,  i32 0, i32 1
+  store float %19, float* %23, align 4
+
+  ret void
+
+}
author	Easwaran Raman <eraman@google.com>	2017-04-18 18:16:57 +0000
committer	Easwaran Raman <eraman@google.com>	2017-04-18 18:16:57 +0000
commit	76aba5f6d79c3f87950da0a78a9ee7dac83d4765 (patch)
tree	9e0260667695b43a5987c7e5a63caf9e64dd5895 /llvm/test
parent	cc7a035a2e352a1147f034b66ee4ef276109d262 (diff)
download	bcm5719-llvm-76aba5f6d79c3f87950da0a78a9ee7dac83d4765.tar.gz bcm5719-llvm-76aba5f6d79c3f87950da0a78a9ee7dac83d4765.zip