From 1b4fecb098a5407734128d628733d1fbd86e054d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 24 Nov 2015 20:31:46 +0000 Subject: [X86][FMA] Optimize FNEG(FMA) Patterns X86 needs to use its own FMA opcodes, preventing the standard FNEG(FMA) pattern table recognition method used by other platforms. This patch adds support for lowering FNEG(FMA(X,Y,Z)) into a single suitably negated FMA instruction. Fix for PR24364 Differential Revision: http://reviews.llvm.org/D14906 llvm-svn: 254016 --- llvm/test/CodeGen/X86/fma_patterns.ll | 68 +++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) (limited to 'llvm/test/CodeGen/X86/fma_patterns.ll') diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index c8667d7b8ad..9c8b12d63fa 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -568,6 +568,74 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do ret <4 x double> %r } +; (fneg (fma x, y, z)) -> (fma x, -y, -z) + +define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f32_fneg_fmadd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_fneg_fmadd: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x float> %a0, %a1 + %add = fadd <4 x float> %mul, %a2 + %neg = fsub <4 x float> , %add + ret <4 x float> %neg +} + +define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f64_fneg_fmsub: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_fneg_fmsub: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x double> %a0, %a1 + %sub = fsub <4 x double> %mul, %a2 + %neg = fsub <4 x double> , %sub + ret <4 x double> %neg +} + +define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f32_fneg_fnmadd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_fneg_fnmadd: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x float> %a0, %a1 + %neg0 = fsub <4 x float> , %mul + %add = fadd <4 x float> %neg0, %a2 + %neg1 = fsub <4 x float> , %add + ret <4 x float> %neg1 +} + +define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f64_fneg_fnmsub: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_fneg_fnmsub: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x double> %a0, %a1 + %neg0 = fsub <4 x double> , %mul + %sub = fsub <4 x double> %neg0, %a2 + %neg1 = fsub <4 x double> , %sub + ret <4 x double> %neg1 +} + ; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { -- cgit v1.2.3