summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSander de Smalen <sander.desmalen@arm.com>2019-05-20 09:54:06 +0000
committerSander de Smalen <sander.desmalen@arm.com>2019-05-20 09:54:06 +0000
commitf83cccf917c1c0db45bfaa6728793eb106dda8af (patch)
treeb2c9399a923624b0b611eb29b7f806162a2e72fd /llvm/test
parent6e8f1a80cd988db8870aff9c3bc2ca7a20e04104 (diff)
downloadbcm5719-llvm-f83cccf917c1c0db45bfaa6728793eb106dda8af.tar.gz
bcm5719-llvm-f83cccf917c1c0db45bfaa6728793eb106dda8af.zip
Match types of accumulator and result for llvm.experimental.vector.reduce.fadd/fmul
The scalar start/accumulator value of the fadd- and fmul reduction should match the result type of the reduction, as well as the vector element-type of the input vector. Although this was not explicitly specified in the LangRef, it was taken for granted in code implementing the reductions. The patch also fixes the LangRef by adding this constraint. Reviewed By: aemerson, nikic Differential Revision: https://reviews.llvm.org/D60260 llvm-svn: 361133
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/Assembler/invalid-vecreduce.ll34
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fadd.ll32
-rw-r--r--llvm/test/CodeGen/X86/haddsub.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll64
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fadd.ll64
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll64
-rw-r--r--llvm/test/CodeGen/X86/vector-reduce-fmul.ll64
7 files changed, 182 insertions, 148 deletions
diff --git a/llvm/test/Assembler/invalid-vecreduce.ll b/llvm/test/Assembler/invalid-vecreduce.ll
new file mode 100644
index 00000000000..d1ca5932a02
--- /dev/null
+++ b/llvm/test/Assembler/invalid-vecreduce.ll
@@ -0,0 +1,34 @@
+; RUN: not opt -S < %s 2>&1 | FileCheck %s
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: float (double, <2 x double>)* @llvm.experimental.vector.reduce.fadd.f32.f64.v2f64
+define float @fadd_invalid_scalar_res(double %acc, <2 x double> %in) {
+ %res = call float @llvm.experimental.vector.reduce.fadd.f32.f64.v2f64(double %acc, <2 x double> %in)
+ ret float %res
+}
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: double (float, <2 x double>)* @llvm.experimental.vector.reduce.fadd.f64.f32.v2f64
+define double @fadd_invalid_scalar_start(float %acc, <2 x double> %in) {
+ %res = call double @llvm.experimental.vector.reduce.fadd.f64.f32.v2f64(float %acc, <2 x double> %in)
+ ret double %res
+}
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <2 x double> (double, <2 x double>)* @llvm.experimental.vector.reduce.fadd.v2f64.f64.v2f64
+define <2 x double> @fadd_invalid_vector_res(double %acc, <2 x double> %in) {
+ %res = call <2 x double> @llvm.experimental.vector.reduce.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in)
+ ret <2 x double> %res
+}
+
+; CHECK: Intrinsic has incorrect argument type!
+; CHECK-NEXT: double (<2 x double>, <2 x double>)* @llvm.experimental.vector.reduce.fadd.f64.v2f64.v2f64
+define double @fadd_invalid_vector_start(<2 x double> %in, <2 x double> %acc) {
+ %res = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in)
+ ret double %res
+}
+
+declare float @llvm.experimental.vector.reduce.fadd.f32.f64.v2f64(double %acc, <2 x double> %in)
+declare double @llvm.experimental.vector.reduce.fadd.f64.f32.v2f64(float %acc, <2 x double> %in)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64.v2f64(<2 x double> %acc, <2 x double> %in)
+declare <2 x double> @llvm.experimental.vector.reduce.fadd.v2f64.f64.v2f64(double %acc, <2 x double> %in)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index b4576e7aa65..c0f25e505d2 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -5,7 +5,7 @@ define float @add_HalfS(<2 x float> %bin.rdx) {
; CHECK-LABEL: add_HalfS:
; CHECK: faddp s0, v0.2s
; CHECK-NEXT: ret
- %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(<2 x float> undef, <2 x float> %bin.rdx)
+ %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %bin.rdx)
ret float %r
}
@@ -23,7 +23,7 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
; CHECKNOFP16-NOT: fadd h{{[0-9]+}}
; CHECKNOFP16-NOT: fadd v{{[0-9]+}}.{{[0-9]}}h
; CHECKNOFP16: ret
- %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v4f16(<4 x half> undef, <4 x half> %bin.rdx)
+ %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v4f16(half undef, <4 x half> %bin.rdx)
ret half %r
}
@@ -45,7 +45,7 @@ define half @add_H(<8 x half> %bin.rdx) {
; CHECKNOFP16-NOT: fadd h{{[0-9]+}}
; CHECKNOFP16-NOT: fadd v{{[0-9]+}}.{{[0-9]}}h
; CHECKNOFP16: ret
- %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v8f16(<8 x half> undef, <8 x half> %bin.rdx)
+ %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v8f16(half undef, <8 x half> %bin.rdx)
ret half %r
}
@@ -55,7 +55,7 @@ define float @add_S(<4 x float> %bin.rdx) {
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
- %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(<4 x float> undef, <4 x float> %bin.rdx)
+ %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %bin.rdx)
ret float %r
}
@@ -63,7 +63,7 @@ define double @add_D(<2 x double> %bin.rdx) {
; CHECK-LABEL: add_D:
; CHECK: faddp d0, v0.2d
; CHECK-NEXT: ret
- %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(<2 x double> undef, <2 x double> %bin.rdx)
+ %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %bin.rdx)
ret double %r
}
@@ -84,7 +84,7 @@ define half @add_2H(<16 x half> %bin.rdx) {
; CHECKNOFP16-NOT: fadd h{{[0-9]+}}
; CHECKNOFP16-NOT: fadd v{{[0-9]+}}.{{[0-9]}}h
; CHECKNOFP16: ret
- %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v16f16(<16 x half> undef, <16 x half> %bin.rdx)
+ %r = call fast half @llvm.experimental.vector.reduce.fadd.f16.v16f16(half undef, <16 x half> %bin.rdx)
ret half %r
}
@@ -95,7 +95,7 @@ define float @add_2S(<8 x float> %bin.rdx) {
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
- %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(<8 x float> undef, <8 x float> %bin.rdx)
+ %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %bin.rdx)
ret float %r
}
@@ -104,16 +104,16 @@ define double @add_2D(<4 x double> %bin.rdx) {
; CHECK: fadd v0.2d, v0.2d, v1.2d
; CHECK-NEXT: faddp d0, v0.2d
; CHECK-NEXT: ret
- %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(<4 x double> undef, <4 x double> %bin.rdx)
+ %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %bin.rdx)
ret double %r
}
; Function Attrs: nounwind readnone
-declare half @llvm.experimental.vector.reduce.fadd.f16.v4f16(<4 x half>, <4 x half>)
-declare half @llvm.experimental.vector.reduce.fadd.f16.v8f16(<8 x half>, <8 x half>)
-declare half @llvm.experimental.vector.reduce.fadd.f16.v16f16(<16 x half>, <16 x half>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(<2 x float>, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(<4 x float>, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(<8 x float>, <8 x float>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(<2 x double>, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(<4 x double>, <4 x double>)
+declare half @llvm.experimental.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
+declare half @llvm.experimental.vector.reduce.fadd.f16.v8f16(half, <8 x half>)
+declare half @llvm.experimental.vector.reduce.fadd.f16.v16f16(half, <16 x half>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll
index 7baa7513fb1..f7d70d5d7da 100644
--- a/llvm/test/CodeGen/X86/haddsub.ll
+++ b/llvm/test/CodeGen/X86/haddsub.ll
@@ -1628,8 +1628,8 @@ define float @extract_extract01_v4f32_fadd_f32_uses3(<4 x float> %x, float* %p1,
; Repeat tests from general reductions to verify output for hoppy targets:
; PR38971: https://bugs.llvm.org/show_bug.cgi?id=38971
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float, <8 x float>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double, <4 x double>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) {
; SSE3-SLOW-LABEL: fadd_reduce_v8f32:
@@ -1671,7 +1671,7 @@ define float @fadd_reduce_v8f32(float %a0, <8 x float> %a1) {
; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vzeroupper
; AVX-FAST-NEXT: retq
- %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
+ %r = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
ret float %r
}
@@ -1707,7 +1707,7 @@ define double @fadd_reduce_v4f64(double %a0, <4 x double> %a1) {
; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vzeroupper
; AVX-FAST-NEXT: retq
- %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
+ %r = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
ret double %r
}
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll
index 54513af0804..3f72450d977 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll
@@ -47,7 +47,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float %a0, <2 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
ret float %1
}
@@ -101,7 +101,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float %a0, <4 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
ret float %1
}
@@ -169,7 +169,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
ret float %1
}
@@ -246,7 +246,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float %a0, <16 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
ret float %1
}
@@ -291,7 +291,7 @@ define float @test_v2f32_zero(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float 0.0, <2 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
ret float %1
}
@@ -346,7 +346,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float 0.0, <4 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
ret float %1
}
@@ -415,7 +415,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
ret float %1
}
@@ -493,7 +493,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
ret float %1
}
@@ -538,7 +538,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float undef, <2 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %a0)
ret float %1
}
@@ -593,7 +593,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %a0)
ret float %1
}
@@ -662,7 +662,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float undef, <8 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %a0)
ret float %1
}
@@ -740,7 +740,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float undef, <16 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float undef, <16 x float> %a0)
ret float %1
}
@@ -778,7 +778,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double %a0, <2 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
ret double %1
}
@@ -825,7 +825,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
ret double %1
}
@@ -879,7 +879,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double %a0, <8 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
ret double %1
}
@@ -944,7 +944,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double %a0, <16 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
ret double %1
}
@@ -983,7 +983,7 @@ define double @test_v2f64_zero(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double 0.0, <2 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
ret double %1
}
@@ -1031,7 +1031,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
ret double %1
}
@@ -1086,7 +1086,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
ret double %1
}
@@ -1151,7 +1151,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
ret double %1
}
@@ -1190,7 +1190,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double undef, <2 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %a0)
ret double %1
}
@@ -1238,7 +1238,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double undef, <4 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %a0)
ret double %1
}
@@ -1293,7 +1293,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double undef, <8 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double undef, <8 x double> %a0)
ret double %1
}
@@ -1358,16 +1358,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double undef, <16 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double undef, <16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double, <16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll
index cae5f4c1252..480b8061413 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll
@@ -39,7 +39,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float %a0, <2 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1)
ret float %1
}
@@ -90,7 +90,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float %a0, <4 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1)
ret float %1
}
@@ -176,7 +176,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1)
ret float %1
}
@@ -327,7 +327,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float %a0, <16 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1)
ret float %1
}
@@ -367,7 +367,7 @@ define float @test_v2f32_zero(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float 0.0, <2 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0)
ret float %1
}
@@ -422,7 +422,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float 0.0, <4 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0)
ret float %1
}
@@ -512,7 +512,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0)
ret float %1
}
@@ -667,7 +667,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0)
ret float %1
}
@@ -699,7 +699,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float undef, <2 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %a0)
ret float %1
}
@@ -746,7 +746,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %a0)
ret float %1
}
@@ -828,7 +828,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float undef, <8 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %a0)
ret float %1
}
@@ -975,7 +975,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float undef, <16 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float undef, <16 x float> %a0)
ret float %1
}
@@ -1004,7 +1004,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double %a0, <2 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1)
ret double %1
}
@@ -1042,7 +1042,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1)
ret double %1
}
@@ -1101,7 +1101,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double %a0, <8 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1)
ret double %1
}
@@ -1202,7 +1202,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double %a0, <16 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1)
ret double %1
}
@@ -1234,7 +1234,7 @@ define double @test_v2f64_zero(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double 0.0, <2 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0)
ret double %1
}
@@ -1275,7 +1275,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0)
ret double %1
}
@@ -1337,7 +1337,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0)
ret double %1
}
@@ -1440,7 +1440,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0)
ret double %1
}
@@ -1466,7 +1466,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double undef, <2 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %a0)
ret double %1
}
@@ -1501,7 +1501,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double undef, <4 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %a0)
ret double %1
}
@@ -1557,7 +1557,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double undef, <8 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double undef, <8 x double> %a0)
ret double %1
}
@@ -1654,16 +1654,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double undef, <16 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double undef, <16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double, <16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll
index 3cd94151aca..8011ee36a59 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll
@@ -35,7 +35,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float %a0, <2 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1)
ret float %1
}
@@ -74,7 +74,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float %a0, <4 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1)
ret float %1
}
@@ -121,7 +121,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1)
ret float %1
}
@@ -175,7 +175,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1)
ret float %1
}
@@ -209,7 +209,7 @@ define float @test_v2f32_zero(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float 1.0, <2 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
ret float %1
}
@@ -249,7 +249,7 @@ define float @test_v4f32_zero(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float 1.0, <4 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
ret float %1
}
@@ -297,7 +297,7 @@ define float @test_v8f32_zero(<8 x float> %a0) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
ret float %1
}
@@ -352,7 +352,7 @@ define float @test_v16f32_zero(<16 x float> %a0) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
ret float %1
}
@@ -386,7 +386,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float undef, <2 x float> %a0)
ret float %1
}
@@ -426,7 +426,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %a0)
ret float %1
}
@@ -474,7 +474,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float undef, <8 x float> %a0)
ret float %1
}
@@ -529,7 +529,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0)
+ %1 = call fast float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float undef, <16 x float> %a0)
ret float %1
}
@@ -556,7 +556,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double %a0, <2 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1)
ret double %1
}
@@ -586,7 +586,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1)
ret double %1
}
@@ -621,7 +621,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1)
ret double %1
}
@@ -663,7 +663,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1)
ret double %1
}
@@ -691,7 +691,7 @@ define double @test_v2f64_zero(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double 1.0, <2 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
ret double %1
}
@@ -722,7 +722,7 @@ define double @test_v4f64_zero(<4 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
ret double %1
}
@@ -758,7 +758,7 @@ define double @test_v8f64_zero(<8 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
ret double %1
}
@@ -800,7 +800,7 @@ define double @test_v16f64_zero(<16 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
ret double %1
}
@@ -828,7 +828,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double undef, <2 x double> %a0)
ret double %1
}
@@ -859,7 +859,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double undef, <4 x double> %a0)
ret double %1
}
@@ -895,7 +895,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double undef, <8 x double> %a0)
ret double %1
}
@@ -937,16 +937,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0)
+ %1 = call fast double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double undef, <16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double, <16 x double>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
index a4b255ce525..8c885b4aad5 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll
@@ -38,7 +38,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float %a0, <2 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float %a0, <2 x float> %a1)
ret float %1
}
@@ -89,7 +89,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) {
; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float %a0, <4 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float %a0, <4 x float> %a1)
ret float %1
}
@@ -175,7 +175,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float %a0, <8 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float %a0, <8 x float> %a1)
ret float %1
}
@@ -326,7 +326,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) {
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float %a0, <16 x float> %a1)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float %a0, <16 x float> %a1)
ret float %1
}
@@ -360,7 +360,7 @@ define float @test_v2f32_one(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float 1.0, <2 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %a0)
ret float %1
}
@@ -407,7 +407,7 @@ define float @test_v4f32_one(<4 x float> %a0) {
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float 1.0, <4 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a0)
ret float %1
}
@@ -489,7 +489,7 @@ define float @test_v8f32_one(<8 x float> %a0) {
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float 1.0, <8 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %a0)
ret float %1
}
@@ -636,7 +636,7 @@ define float @test_v16f32_one(<16 x float> %a0) {
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float 1.0, <16 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a0)
ret float %1
}
@@ -668,7 +668,7 @@ define float @test_v2f32_undef(<2 x float> %a0) {
; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float undef, <2 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float undef, <2 x float> %a0)
ret float %1
}
@@ -715,7 +715,7 @@ define float @test_v4f32_undef(<4 x float> %a0) {
; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float undef, <4 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float undef, <4 x float> %a0)
ret float %1
}
@@ -797,7 +797,7 @@ define float @test_v8f32_undef(<8 x float> %a0) {
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float undef, <8 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float undef, <8 x float> %a0)
ret float %1
}
@@ -944,7 +944,7 @@ define float @test_v16f32_undef(<16 x float> %a0) {
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float undef, <16 x float> %a0)
+ %1 = call float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float undef, <16 x float> %a0)
ret float %1
}
@@ -973,7 +973,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double %a0, <2 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double %a0, <2 x double> %a1)
ret double %1
}
@@ -1011,7 +1011,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double %a0, <4 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double %a0, <4 x double> %a1)
ret double %1
}
@@ -1070,7 +1070,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double %a0, <8 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double %a0, <8 x double> %a1)
ret double %1
}
@@ -1171,7 +1171,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double %a0, <16 x double> %a1)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double %a0, <16 x double> %a1)
ret double %1
}
@@ -1199,7 +1199,7 @@ define double @test_v2f64_one(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double 1.0, <2 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a0)
ret double %1
}
@@ -1236,7 +1236,7 @@ define double @test_v4f64_one(<4 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double 1.0, <4 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %a0)
ret double %1
}
@@ -1294,7 +1294,7 @@ define double @test_v8f64_one(<8 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double 1.0, <8 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double 1.0, <8 x double> %a0)
ret double %1
}
@@ -1392,7 +1392,7 @@ define double @test_v16f64_one(<16 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double 1.0, <16 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double 1.0, <16 x double> %a0)
ret double %1
}
@@ -1418,7 +1418,7 @@ define double @test_v2f64_undef(<2 x double> %a0) {
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512-NEXT: vmulsd {{.*}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double undef, <2 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double undef, <2 x double> %a0)
ret double %1
}
@@ -1453,7 +1453,7 @@ define double @test_v4f64_undef(<4 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double undef, <4 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double undef, <4 x double> %a0)
ret double %1
}
@@ -1509,7 +1509,7 @@ define double @test_v8f64_undef(<8 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double undef, <8 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double undef, <8 x double> %a0)
ret double %1
}
@@ -1606,16 +1606,16 @@ define double @test_v16f64_undef(<16 x double> %a0) {
; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
- %1 = call double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double undef, <16 x double> %a0)
+ %1 = call double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double undef, <16 x double> %a0)
ret double %1
}
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v2f32(float, <2 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v8f32(float, <8 x float>)
-declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v16f32(float, <16 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
+declare float @llvm.experimental.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v2f64(double, <2 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v4f64(double, <4 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v8f64(double, <8 x double>)
-declare double @llvm.experimental.vector.reduce.fmul.f64.f64.v16f64(double, <16 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v4f64(double, <4 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v8f64(double, <8 x double>)
+declare double @llvm.experimental.vector.reduce.fmul.f64.v16f64(double, <16 x double>)
OpenPOWER on IntegriCloud