summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorStephen Lin <stephenwlin@gmail.com>2013-07-09 18:16:56 +0000
committerStephen Lin <stephenwlin@gmail.com>2013-07-09 18:16:56 +0000
commit73de7bf5dec63e8ca45a446373ab61a2e22d103c (patch)
tree61dd22e9276131538d96c046212d55199febb05b /llvm/test/CodeGen
parentff666bd962a4446d80955fe75619201c29795501 (diff)
downloadbcm5719-llvm-73de7bf5dec63e8ca45a446373ab61a2e22d103c.tar.gz
bcm5719-llvm-73de7bf5dec63e8ca45a446373ab61a2e22d103c.zip
AArch64/PowerPC/SystemZ/X86: This patch fixes the interface, usage, and all
in-tree implementations of TargetLoweringBase::isFMAFasterThanMulAndAdd in order to resolve the following issues with fmuladd (i.e. optional FMA) intrinsics: 1. On X86(-64) targets, ISD::FMA nodes are formed when lowering fmuladd intrinsics even if the subtarget does not support FMA instructions, leading to laughably bad code generation in some situations. 2. On AArch64 targets, ISD::FMA nodes are formed for operations on fp128, resulting in a call to a software fp128 FMA implementation. 3. On PowerPC targets, FMAs are not generated from fmuladd intrinsics on types like v2f32, v8f32, v4f64, etc., even though they promote, split, scalarize, etc. to types that support hardware FMAs. The function has also been slightly renamed for consistency and to force a merge/build conflict for any out-of-tree target implementing it. To resolve, see comments and fixed in-tree examples. llvm-svn: 185956
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/fp-dp3.ll34
-rw-r--r--llvm/test/CodeGen/AArch64/illegal-float-ops.ll26
-rw-r--r--llvm/test/CodeGen/PowerPC/vec_fmuladd.ll56
-rw-r--r--llvm/test/CodeGen/X86/extended-fma-contraction.ll22
-rw-r--r--llvm/test/CodeGen/X86/fma_patterns_wide.ll84
-rw-r--r--llvm/test/CodeGen/X86/wide-fma-contraction.ll14
6 files changed, 233 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/AArch64/fp-dp3.ll b/llvm/test/CodeGen/AArch64/fp-dp3.ll
index 39db9be1577..f372c43159b 100644
--- a/llvm/test/CodeGen/AArch64/fp-dp3.ll
+++ b/llvm/test/CodeGen/AArch64/fp-dp3.ll
@@ -1,102 +1,136 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST
declare float @llvm.fma.f32(float, float, float)
declare double @llvm.fma.f64(double, double, double)
define float @test_fmadd(float %a, float %b, float %c) {
; CHECK: test_fmadd:
+; CHECK-NOFAST: test_fmadd:
%val = call float @llvm.fma.f32(float %a, float %b, float %c)
; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %val
}
define float @test_fmsub(float %a, float %b, float %c) {
; CHECK: test_fmsub:
+; CHECK-NOFAST: test_fmsub:
%nega = fsub float -0.0, %a
%val = call float @llvm.fma.f32(float %nega, float %b, float %c)
; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %val
}
define float @test_fnmadd(float %a, float %b, float %c) {
; CHECK: test_fnmadd:
+; CHECK-NOFAST: test_fnmadd:
%negc = fsub float -0.0, %c
%val = call float @llvm.fma.f32(float %a, float %b, float %negc)
; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %val
}
define float @test_fnmsub(float %a, float %b, float %c) {
; CHECK: test_fnmsub:
+; CHECK-NOFAST: test_fnmsub:
%nega = fsub float -0.0, %a
%negc = fsub float -0.0, %c
%val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %val
}
define double @testd_fmadd(double %a, double %b, double %c) {
; CHECK: testd_fmadd:
+; CHECK-NOFAST: testd_fmadd:
%val = call double @llvm.fma.f64(double %a, double %b, double %c)
; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
ret double %val
}
define double @testd_fmsub(double %a, double %b, double %c) {
; CHECK: testd_fmsub:
+; CHECK-NOFAST: testd_fmsub:
%nega = fsub double -0.0, %a
%val = call double @llvm.fma.f64(double %nega, double %b, double %c)
; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
ret double %val
}
define double @testd_fnmadd(double %a, double %b, double %c) {
; CHECK: testd_fnmadd:
+; CHECK-NOFAST: testd_fnmadd:
%negc = fsub double -0.0, %c
%val = call double @llvm.fma.f64(double %a, double %b, double %negc)
; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
ret double %val
}
define double @testd_fnmsub(double %a, double %b, double %c) {
; CHECK: testd_fnmsub:
+; CHECK-NOFAST: testd_fnmsub:
%nega = fsub double -0.0, %a
%negc = fsub double -0.0, %c
%val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
ret double %val
}
define float @test_fmadd_unfused(float %a, float %b, float %c) {
; CHECK: test_fmadd_unfused:
+; CHECK-NOFAST: test_fmadd_unfused:
%prod = fmul float %b, %c
%sum = fadd float %a, %prod
; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %sum
}
define float @test_fmsub_unfused(float %a, float %b, float %c) {
; CHECK: test_fmsub_unfused:
+; CHECK-NOFAST: test_fmsub_unfused:
%prod = fmul float %b, %c
%diff = fsub float %a, %prod
; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %diff
}
define float @test_fnmadd_unfused(float %a, float %b, float %c) {
; CHECK: test_fnmadd_unfused:
+; CHECK-NOFAST: test_fnmadd_unfused:
%nega = fsub float -0.0, %a
%prod = fmul float %b, %c
%sum = fadd float %nega, %prod
; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %sum
}
define float @test_fnmsub_unfused(float %a, float %b, float %c) {
; CHECK: test_fnmsub_unfused:
+; CHECK-NOFAST: test_fnmsub_unfused:
%nega = fsub float -0.0, %a
%prod = fmul float %b, %c
%diff = fsub float %nega, %prod
; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fneg {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
ret float %diff
}
diff --git a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
index 446151b8ffa..a398f7bff61 100644
--- a/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
+++ b/llvm/test/CodeGen/AArch64/illegal-float-ops.ll
@@ -219,3 +219,29 @@ define void @test_frem(float %float, double %double, fp128 %fp128) {
ret void
}
+
+declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
+
+define void @test_fma(fp128 %fp128) {
+; CHECK: test_fma:
+
+ %fmafp128 = call fp128 @llvm.fma.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+ store fp128 %fmafp128, fp128* @varfp128
+; CHECK: bl fmal
+
+ ret void
+}
+
+declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
+
+define void @test_fmuladd(fp128 %fp128) {
+; CHECK: test_fmuladd:
+
+ %fmuladdfp128 = call fp128 @llvm.fmuladd.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+ store fp128 %fmuladdfp128, fp128* @varfp128
+; CHECK-NOT: bl fmal
+; CHECK: bl __multf3
+; CHECK: bl __addtf3
+
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/vec_fmuladd.ll b/llvm/test/CodeGen/PowerPC/vec_fmuladd.ll
new file mode 100644
index 00000000000..b1bc377facf
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vec_fmuladd.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float> %val, <2 x float>, <2 x float>)
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float> %val, <4 x float>, <4 x float>)
+declare <8 x float> @llvm.fmuladd.v8f32(<8 x float> %val, <8 x float>, <8 x float>)
+declare <2 x double> @llvm.fmuladd.v2f64(<2 x double> %val, <2 x double>, <2 x double>)
+declare <4 x double> @llvm.fmuladd.v4f64(<4 x double> %val, <4 x double>, <4 x double>)
+
+define <2 x float> @v2f32_fmuladd(<2 x float> %x) nounwind readnone {
+entry:
+ %fmuladd = call <2 x float> @llvm.fmuladd.v2f32 (<2 x float> %x, <2 x float> %x, <2 x float> %x)
+ ret <2 x float> %fmuladd
+}
+; fmuladd (<2 x float>) is promoted to fmuladd (<4 x float>)
+; CHECK: v2f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x float> @v4f32_fmuladd(<4 x float> %x) nounwind readnone {
+entry:
+ %fmuladd = call <4 x float> @llvm.fmuladd.v4f32 (<4 x float> %x, <4 x float> %x, <4 x float> %x)
+ ret <4 x float> %fmuladd
+}
+; CHECK: v4f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <8 x float> @v8f32_fmuladd(<8 x float> %x) nounwind readnone {
+entry:
+ %fmuladd = call <8 x float> @llvm.fmuladd.v8f32 (<8 x float> %x, <8 x float> %x, <8 x float> %x)
+ ret <8 x float> %fmuladd
+}
+; CHECK: v8f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <2 x double> @v2f64_fmuladd(<2 x double> %x) nounwind readnone {
+entry:
+ %fmuladd = call <2 x double> @llvm.fmuladd.v2f64 (<2 x double> %x, <2 x double> %x, <2 x double> %x)
+ ret <2 x double> %fmuladd
+}
+; CHECK: v2f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x double> @v4f64_fmuladd(<4 x double> %x) nounwind readnone {
+entry:
+ %fmuladd = call <4 x double> @llvm.fmuladd.v4f64 (<4 x double> %x, <4 x double> %x, <4 x double> %x)
+ ret <4 x double> %fmuladd
+}
+; CHECK: v4f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
diff --git a/llvm/test/CodeGen/X86/extended-fma-contraction.ll b/llvm/test/CodeGen/X86/extended-fma-contraction.ll
new file mode 100644
index 00000000000..ef2c22b9ab8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/extended-fma-contraction.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
+
+; CHECK: fmafunc
+define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
+
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
+ %ret = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
+ ret <3 x float> %ret
+}
+
+declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/fma_patterns_wide.ll b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
new file mode 100644
index 00000000000..d84e5a08176
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fma_patterns_wide.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
+
+; CHECK: test_x86_fmadd_ps_y_wide
+; CHECK: vfmadd213ps
+; CHECK: vfmadd213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_ps_y_wide
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ %x = fmul <16 x float> %a0, %a1
+ %res = fadd <16 x float> %x, %a2
+ ret <16 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y_wide
+; CHECK: vfmsub213ps
+; CHECK: vfmsub213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_ps_y_wide
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ %x = fmul <16 x float> %a0, %a1
+ %res = fsub <16 x float> %x, %a2
+ ret <16 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y_wide
+; CHECK: vfnmadd213ps
+; CHECK: vfnmadd213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fnmadd_ps_y_wide
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fnmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ %x = fmul <16 x float> %a0, %a1
+ %res = fsub <16 x float> %a2, %x
+ ret <16 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y_wide
+; CHECK: vfnmsub213ps
+; CHECK: vfnmsub213ps
+; CHECK: ret
+define <16 x float> @test_x86_fnmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+ %x = fmul <16 x float> %a0, %a1
+ %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+ %res = fsub <16 x float> %y, %a2
+ ret <16 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y_wide
+; CHECK: vfmadd213pd
+; CHECK: vfmadd213pd
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_pd_y_wide
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmadd_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ %x = fmul <8 x double> %a0, %a1
+ %res = fadd <8 x double> %x, %a2
+ ret <8 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y_wide
+; CHECK: vfmsub213pd
+; CHECK: vfmsub213pd
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_pd_y_wide
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmsub_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+ %x = fmul <8 x double> %a0, %a1
+ %res = fsub <8 x double> %x, %a2
+ ret <8 x double> %res
+}
diff --git a/llvm/test/CodeGen/X86/wide-fma-contraction.ll b/llvm/test/CodeGen/X86/wide-fma-contraction.ll
index d93f33ba0e5..7ee0fbaf59c 100644
--- a/llvm/test/CodeGen/X86/wide-fma-contraction.ll
+++ b/llvm/test/CodeGen/X86/wide-fma-contraction.ll
@@ -1,7 +1,9 @@
; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
; CHECK: fmafunc
define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+
; CHECK-NOT: vmulps
; CHECK-NOT: vaddps
; CHECK: vfmaddps
@@ -10,11 +12,17 @@ define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c)
; CHECK: vfmaddps
; CHECK-NOT: vmulps
; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
%ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
ret <16 x float> %ret
}
declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
-
-
-
OpenPOWER on IntegriCloud