From 4ff62b187e7429521bd6a052bd8c68694177a051 Mon Sep 17 00:00:00 2001 From: Tim Shen Date: Wed, 12 Oct 2016 00:48:25 +0000 Subject: [PPCMIPeephole] Fix splat elimination Summary: In PPCMIPeephole, when we see two splat instructions, we can't simply do the following transformation: B = Splat A C = Splat B => C = Splat A because B may still be used between these two instructions. Instead, we should make the second Splat a PPC::COPY and let later passes decide whether to remove it or not: B = Splat A C = Splat B => B = Splat A C = COPY B Fixes PR30663. Reviewers: echristo, iteratee, kbarton, nemanjai Subscribers: mehdi_amini, llvm-commits Differential Revision: https://reviews.llvm.org/D25493 llvm-svn: 283961 --- llvm/test/CodeGen/PowerPC/pr30663.ll | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/pr30663.ll (limited to 'llvm/test/CodeGen/PowerPC') diff --git a/llvm/test/CodeGen/PowerPC/pr30663.ll b/llvm/test/CodeGen/PowerPC/pr30663.ll new file mode 100644 index 00000000000..0772fcaadfe --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr30663.ll @@ -0,0 +1,24 @@ +; RUN: llc -O1 < %s | FileCheck %s +target triple = "powerpc64le-linux-gnu" + +; The second xxspltw should be eliminated. +; CHECK: xxspltw +; CHECK-NOT: xxspltw +define void @Test() { +bb4: + %tmp = load <4 x i8>, <4 x i8>* undef + %tmp8 = bitcast <4 x i8> %tmp to float + %tmp18 = fmul float %tmp8, undef + %tmp19 = fsub float 0.000000e+00, %tmp18 + store float %tmp19, float* undef + %tmp22 = shufflevector <4 x i8> %tmp, <4 x i8> undef, <16 x i32> + %tmp23 = bitcast <16 x i8> %tmp22 to <4 x float> + %tmp25 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> %tmp23, <4 x float> undef) + %tmp26 = fsub <4 x float> zeroinitializer, %tmp25 + %tmp27 = bitcast <4 x float> %tmp26 to <4 x i32> + tail call void @llvm.ppc.altivec.stvx(<4 x i32> %tmp27, i8* undef) + ret void +} + +declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) -- cgit v1.2.3