summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorTim Shen <timshen91@gmail.com>2016-10-12 00:48:25 +0000
committerTim Shen <timshen91@gmail.com>2016-10-12 00:48:25 +0000
commit4ff62b187e7429521bd6a052bd8c68694177a051 (patch)
tree53a8648cfeb7c82eee64f82e166108565087299a /llvm/test/CodeGen/PowerPC
parente04aebe904aea62e8079db4ac7f38585188ec47c (diff)
downloadbcm5719-llvm-4ff62b187e7429521bd6a052bd8c68694177a051.tar.gz
bcm5719-llvm-4ff62b187e7429521bd6a052bd8c68694177a051.zip
[PPCMIPeephole] Fix splat elimination
Summary: In PPCMIPeephole, when we see two splat instructions, we can't simply do the following transformation: B = Splat A C = Splat B => C = Splat A because B may still be used between these two instructions. Instead, we should make the second Splat a PPC::COPY and let later passes decide whether to remove it or not: B = Splat A C = Splat B => B = Splat A C = COPY B Fixes PR30663. Reviewers: echristo, iteratee, kbarton, nemanjai Subscribers: mehdi_amini, llvm-commits Differential Revision: https://reviews.llvm.org/D25493 llvm-svn: 283961
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
-rw-r--r--llvm/test/CodeGen/PowerPC/pr30663.ll24
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/pr30663.ll b/llvm/test/CodeGen/PowerPC/pr30663.ll
new file mode 100644
index 00000000000..0772fcaadfe
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr30663.ll
@@ -0,0 +1,24 @@
+; RUN: llc -O1 < %s | FileCheck %s
+target triple = "powerpc64le-linux-gnu"
+
+; The second xxspltw should be eliminated.
+; CHECK: xxspltw
+; CHECK-NOT: xxspltw
+define void @Test() {
+bb4:
+ %tmp = load <4 x i8>, <4 x i8>* undef
+ %tmp8 = bitcast <4 x i8> %tmp to float
+ %tmp18 = fmul float %tmp8, undef
+ %tmp19 = fsub float 0.000000e+00, %tmp18
+ store float %tmp19, float* undef
+ %tmp22 = shufflevector <4 x i8> %tmp, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float>
+ %tmp25 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> %tmp23, <4 x float> undef)
+ %tmp26 = fsub <4 x float> zeroinitializer, %tmp25
+ %tmp27 = bitcast <4 x float> %tmp26 to <4 x i32>
+ tail call void @llvm.ppc.altivec.stvx(<4 x i32> %tmp27, i8* undef)
+ ret void
+}
+
+declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*)
+declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
OpenPOWER on IntegriCloud