diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-11-24 20:31:46 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-11-24 20:31:46 +0000 |
commit | 1b4fecb098a5407734128d628733d1fbd86e054d (patch) | |
tree | c83d383022a41d6f007800e7d964957967ab7417 /llvm/test/CodeGen/X86/fma_patterns.ll | |
parent | 29ffb68259bfefe0a2ec649f0912cba4f71e85ba (diff) | |
download | bcm5719-llvm-1b4fecb098a5407734128d628733d1fbd86e054d.tar.gz bcm5719-llvm-1b4fecb098a5407734128d628733d1fbd86e054d.zip |
[X86][FMA] Optimize FNEG(FMA) Patterns
X86 needs to use its own FMA opcodes, preventing the standard FNEG(FMA) pattern table recognition method used by other platforms. This patch adds support for lowering FNEG(FMA(X,Y,Z)) into a single suitably negated FMA instruction.
Fix for PR24364
Differential Revision: http://reviews.llvm.org/D14906
llvm-svn: 254016
Diffstat (limited to 'llvm/test/CodeGen/X86/fma_patterns.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/fma_patterns.ll | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll index c8667d7b8ad..9c8b12d63fa 100644 --- a/llvm/test/CodeGen/X86/fma_patterns.ll +++ b/llvm/test/CodeGen/X86/fma_patterns.ll @@ -568,6 +568,74 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do ret <4 x double> %r } +; (fneg (fma x, y, z)) -> (fma x, -y, -z) + +define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f32_fneg_fmadd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_fneg_fmadd: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x float> %a0, %a1 + %add = fadd <4 x float> %mul, %a2 + %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add + ret <4 x float> %neg +} + +define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f64_fneg_fmsub: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_fneg_fmsub: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x double> %a0, %a1 + %sub = fsub <4 x double> %mul, %a2 + %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub + ret <4 x double> %neg +} + +define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f32_fneg_fnmadd: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f32_fneg_fnmadd: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x float> %a0, %a1 + %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul + %add = fadd <4 x float> %neg0, %a2 + %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add + ret <4 x float> %neg1 +} + +define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { +; CHECK_FMA-LABEL: test_v4f64_fneg_fnmsub: +; CHECK_FMA: # BB#0: +; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 +; CHECK_FMA-NEXT: retq +; +; CHECK_FMA4-LABEL: test_v4f64_fneg_fnmsub: +; CHECK_FMA4: # BB#0: +; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 +; CHECK_FMA4-NEXT: retq + %mul = fmul <4 x double> %a0, %a1 + %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul + %sub = fsub <4 x double> %neg0, %a2 + %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub + ret <4 x double> %neg1 +} + ; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { |