summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/fma_patterns.ll
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2015-11-24 20:31:46 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2015-11-24 20:31:46 +0000
commit1b4fecb098a5407734128d628733d1fbd86e054d (patch)
treec83d383022a41d6f007800e7d964957967ab7417 /llvm/test/CodeGen/X86/fma_patterns.ll
parent29ffb68259bfefe0a2ec649f0912cba4f71e85ba (diff)
downloadbcm5719-llvm-1b4fecb098a5407734128d628733d1fbd86e054d.tar.gz
bcm5719-llvm-1b4fecb098a5407734128d628733d1fbd86e054d.zip
[X86][FMA] Optimize FNEG(FMA) Patterns
X86 needs to use its own FMA opcodes, preventing the standard FNEG(FMA) pattern table recognition method used by other platforms. This patch adds support for lowering FNEG(FMA(X,Y,Z)) into a single suitably negated FMA instruction. Fix for PR24364 Differential Revision: http://reviews.llvm.org/D14906 llvm-svn: 254016
Diffstat (limited to 'llvm/test/CodeGen/X86/fma_patterns.ll')
-rw-r--r--llvm/test/CodeGen/X86/fma_patterns.ll68
1 files changed, 68 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index c8667d7b8ad..9c8b12d63fa 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -568,6 +568,74 @@ define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x do
ret <4 x double> %r
}
+; (fneg (fma x, y, z)) -> (fma x, -y, -z)
+
+define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f32_fneg_fmadd:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f32_fneg_fmadd:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %add = fadd <4 x float> %mul, %a2
+ %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg
+}
+
+define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f64_fneg_fmsub:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f64_fneg_fmsub:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %sub = fsub <4 x double> %mul, %a2
+ %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg
+}
+
+define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f32_fneg_fnmadd:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f32_fneg_fnmadd:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x float> %a0, %a1
+ %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+ %add = fadd <4 x float> %neg0, %a2
+ %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+ ret <4 x float> %neg1
+}
+
+define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK_FMA-LABEL: test_v4f64_fneg_fnmsub:
+; CHECK_FMA: # BB#0:
+; CHECK_FMA-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0
+; CHECK_FMA-NEXT: retq
+;
+; CHECK_FMA4-LABEL: test_v4f64_fneg_fnmsub:
+; CHECK_FMA4: # BB#0:
+; CHECK_FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK_FMA4-NEXT: retq
+ %mul = fmul <4 x double> %a0, %a1
+ %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+ %sub = fsub <4 x double> %neg0, %a2
+ %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+ ret <4 x double> %neg1
+}
+
; (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
OpenPOWER on IntegriCloud