diff options
author | Sander de Smalen <sander.desmalen@arm.com> | 2019-05-20 09:54:06 +0000 |
---|---|---|
committer | Sander de Smalen <sander.desmalen@arm.com> | 2019-05-20 09:54:06 +0000 |
commit | f83cccf917c1c0db45bfaa6728793eb106dda8af (patch) | |
tree | b2c9399a923624b0b611eb29b7f806162a2e72fd /llvm/test/CodeGen/X86/vector-reduce-fadd.ll | |
parent | 6e8f1a80cd988db8870aff9c3bc2ca7a20e04104 (diff) | |
download | bcm5719-llvm-f83cccf917c1c0db45bfaa6728793eb106dda8af.tar.gz bcm5719-llvm-f83cccf917c1c0db45bfaa6728793eb106dda8af.zip |
Match types of accumulator and result for llvm.experimental.vector.reduce.fadd/fmul
The scalar start/accumulator value of the fadd- and fmul reduction
should match the result type of the reduction, as well as the vector
element-type of the input vector. Although this was not explicitly
specified in the LangRef, it was taken for granted in code implementing
the reductions. The patch also fixes the LangRef by adding this
constraint.
Reviewed By: aemerson, nikic
Differential Revision: https://reviews.llvm.org/D60260
llvm-svn: 361133
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-reduce-fadd.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-reduce-fadd.ll | 64 |
1 files changed, 32 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll index cae5f4c1252..480b8061413 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll @@ -39,7 +39,7 @@ define float @test_v2f32(float %a0, <2 x float> %a1) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float %a0, <2 x float> %a1) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float %a0, <2 x float> %a1) ret float %1 } @@ -90,7 +90,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float %a0, <4 x float> %a1) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %a1) ret float %1 } @@ -176,7 +176,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float %a0, <8 x float> %a1) ret float %1 } @@ -327,7 +327,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; AVX512-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float %a0, <16 x float> %a1) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float %a0, <16 x float> %a1) ret float %1 } @@ -367,7 +367,7 @@ define float @test_v2f32_zero(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float 0.0, <2 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float 0.0, <2 x float> %a0) ret float %1 } @@ -422,7 +422,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float 0.0, <4 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float 0.0, <4 x float> %a0) ret float %1 } @@ -512,7 +512,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float 0.0, <8 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float 0.0, <8 x float> %a0) ret float %1 } @@ -667,7 +667,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float 0.0, <16 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a0) ret float %1 } @@ -699,7 +699,7 @@ define float @test_v2f32_undef(<2 x float> %a0) { ; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] ; AVX512-NEXT: vaddss {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float undef, <2 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float undef, <2 x float> %a0) ret float %1 } @@ -746,7 +746,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; AVX512-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float undef, <4 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float undef, <4 x float> %a0) ret float %1 } @@ -828,7 +828,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float undef, <8 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float undef, <8 x float> %a0) ret float %1 } @@ -975,7 +975,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; AVX512-NEXT: vaddss %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float undef, <16 x float> %a0) + %1 = call float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float undef, <16 x float> %a0) ret float %1 } @@ -1004,7 +1004,7 @@ define double @test_v2f64(double %a0, <2 x double> %a1) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double %a0, <2 x double> %a1) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double %a0, <2 x double> %a1) ret double %1 } @@ -1042,7 +1042,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double %a0, <4 x double> %a1) ret double %1 } @@ -1101,7 +1101,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double %a0, <8 x double> %a1) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double %a0, <8 x double> %a1) ret double %1 } @@ -1202,7 +1202,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double %a0, <16 x double> %a1) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double %a0, <16 x double> %a1) ret double %1 } @@ -1234,7 +1234,7 @@ define double @test_v2f64_zero(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double 0.0, <2 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double 0.0, <2 x double> %a0) ret double %1 } @@ -1275,7 +1275,7 @@ define double @test_v4f64_zero(<4 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double 0.0, <4 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double 0.0, <4 x double> %a0) ret double %1 } @@ -1337,7 +1337,7 @@ define double @test_v8f64_zero(<8 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double 0.0, <8 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double 0.0, <8 x double> %a0) ret double %1 } @@ -1440,7 +1440,7 @@ define double @test_v16f64_zero(<16 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double 0.0, <16 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double 0.0, <16 x double> %a0) ret double %1 } @@ -1466,7 +1466,7 @@ define double @test_v2f64_undef(<2 x double> %a0) { ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0 ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double undef, <2 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double undef, <2 x double> %a0) ret double %1 } @@ -1501,7 +1501,7 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double undef, <4 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double undef, <4 x double> %a0) ret double %1 } @@ -1557,7 +1557,7 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double undef, <8 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double undef, <8 x double> %a0) ret double %1 } @@ -1654,16 +1654,16 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq - %1 = call double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double undef, <16 x double> %a0) + %1 = call double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double undef, <16 x double> %a0) ret double %1 } -declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v2f32(float, <2 x float>) -declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float, <4 x float>) -declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float, <8 x float>) -declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v16f32(float, <16 x float>) +declare float @llvm.experimental.vector.reduce.fadd.f32.v2f32(float, <2 x float>) +declare float @llvm.experimental.vector.reduce.fadd.f32.v4f32(float, <4 x float>) +declare float @llvm.experimental.vector.reduce.fadd.f32.v8f32(float, <8 x float>) +declare float @llvm.experimental.vector.reduce.fadd.f32.v16f32(float, <16 x float>) -declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v2f64(double, <2 x double>) -declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double, <4 x double>) -declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v8f64(double, <8 x double>) -declare double @llvm.experimental.vector.reduce.fadd.f64.f64.v16f64(double, <16 x double>) +declare double @llvm.experimental.vector.reduce.fadd.f64.v2f64(double, <2 x double>) +declare double @llvm.experimental.vector.reduce.fadd.f64.v4f64(double, <4 x double>) +declare double @llvm.experimental.vector.reduce.fadd.f64.v8f64(double, <8 x double>) +declare double @llvm.experimental.vector.reduce.fadd.f64.v16f64(double, <16 x double>) |