From 4ff62b187e7429521bd6a052bd8c68694177a051 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen91@gmail.com>
Date: Wed, 12 Oct 2016 00:48:25 +0000
Subject: [PPCMIPeephole] Fix splat elimination

Summary:
In PPCMIPeephole, when we see two splat instructions, we can't simply do the following transformation:
  B = Splat A
  C = Splat B
=>
  C = Splat A
because B may still be used between these two instructions. Instead, we should make the second Splat a PPC::COPY and let later passes decide whether to remove it or not:
  B = Splat A
  C = Splat B
=>
  B = Splat A
  C = COPY B

Fixes PR30663.

Reviewers: echristo, iteratee, kbarton, nemanjai

Subscribers: mehdi_amini, llvm-commits

Differential Revision: https://reviews.llvm.org/D25493

llvm-svn: 283961
---
 llvm/test/CodeGen/PowerPC/pr30663.ll | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/pr30663.ll

(limited to 'llvm/test/CodeGen/PowerPC')
diff --git a/llvm/test/CodeGen/PowerPC/pr30663.ll b/llvm/test/CodeGen/PowerPC/pr30663.ll
new file mode 100644
index 00000000000..0772fcaadfe
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr30663.ll
@@ -0,0 +1,24 @@
+; RUN: llc -O1 < %s | FileCheck %s
+target triple = "powerpc64le-linux-gnu"
+
+; The second xxspltw should be eliminated.
+; CHECK: xxspltw
+; CHECK-NOT: xxspltw
+define void @Test() {
+bb4:
+  %tmp = load <4 x i8>, <4 x i8>* undef
+  %tmp8 = bitcast <4 x i8> %tmp to float
+  %tmp18 = fmul float %tmp8, undef
+  %tmp19 = fsub float 0.000000e+00, %tmp18
+  store float %tmp19, float* undef
+  %tmp22 = shufflevector <4 x i8> %tmp, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float>
+  %tmp25 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> %tmp23, <4 x float> undef)
+  %tmp26 = fsub <4 x float> zeroinitializer, %tmp25
+  %tmp27 = bitcast <4 x float> %tmp26 to <4 x i32>
+  tail call void @llvm.ppc.altivec.stvx(<4 x i32> %tmp27, i8* undef)
+  ret void
+}
+
+declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-- 
cgit v1.2.3