diff options
| -rw-r--r-- | llvm/test/CodeGen/X86/haddsub.ll | 82 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/phaddsub.ll | 115 |
2 files changed, 79 insertions, 118 deletions
diff --git a/llvm/test/CodeGen/X86/haddsub.ll b/llvm/test/CodeGen/X86/haddsub.ll index 1f6b3b1b947..bbb08f0d652 100644 --- a/llvm/test/CodeGen/X86/haddsub.ll +++ b/llvm/test/CodeGen/X86/haddsub.ll @@ -1,10 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE3,SSE3-SLOW ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse3,fast-hops | FileCheck %s --check-prefixes=SSE3,SSE3-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1-FAST -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX1,AVX1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX1,AVX1-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX2,AVX2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX2,AVX2-FAST +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX-SLOW,AVX512,AVX512-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,fast-hops | FileCheck %s --check-prefixes=AVX,AVX-FAST,AVX512,AVX512-FAST define <2 x double> @haddpd1(<2 x double> %x, <2 x double> %y) { ; SSE3-LABEL: haddpd1: @@ -1438,3 +1440,75 @@ define double @fadd_reduce_v4f64(double %a0, <4 x double> %a1) { ret double %r } +define float @PR39936_v8f32(<8 x float>) { +; SSSE3-SLOW-LABEL: PR39936_v8f32: +; SSSE3-SLOW: # %bb.0: +; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] +; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSSE3-SLOW-NEXT: addss %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: retq +; +; SSSE3-FAST-LABEL: PR39936_v8f32: +; SSSE3-FAST: # %bb.0: +; SSSE3-FAST-NEXT: haddps %xmm1, %xmm0 +; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSSE3-FAST-NEXT: retq +; +; SSE3-SLOW-LABEL: PR39936_v8f32: +; SSE3-SLOW: # %bb.0: +; SSE3-SLOW-NEXT: haddps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: movaps %xmm0, %xmm1 +; SSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] +; SSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; SSE3-SLOW-NEXT: addps %xmm1, %xmm0 +; SSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; SSE3-SLOW-NEXT: addss %xmm1, %xmm0 +; SSE3-SLOW-NEXT: retq +; +; SSE3-FAST-LABEL: PR39936_v8f32: +; SSE3-FAST: # %bb.0: +; SSE3-FAST-NEXT: haddps %xmm1, %xmm0 +; SSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSE3-FAST-NEXT: haddps %xmm0, %xmm0 +; SSE3-FAST-NEXT: retq +; +; AVX-SLOW-LABEL: PR39936_v8f32: +; AVX-SLOW: # %bb.0: +; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3] +; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3] +; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] +; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; AVX-SLOW-NEXT: vzeroupper +; AVX-SLOW-NEXT: retq +; +; AVX-FAST-LABEL: PR39936_v8f32: +; AVX-FAST: # %bb.0: +; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] +; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX-FAST-NEXT: vaddps %xmm0, %xmm2, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 +; AVX-FAST-NEXT: vzeroupper +; AVX-FAST-NEXT: retq + %2 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> + %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> + %4 = fadd <8 x float> %2, %3 + %5 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %6 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %7 = fadd <8 x float> %5, %6 + %8 = shufflevector <8 x float> %7, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> + %9 = fadd <8 x float> %7, %8 + %10 = extractelement <8 x float> %9, i32 0 + ret float %10 +} diff --git a/llvm/test/CodeGen/X86/phaddsub.ll b/llvm/test/CodeGen/X86/phaddsub.ll index 37b70376462..697c8d71c0f 100644 --- a/llvm/test/CodeGen/X86/phaddsub.ll +++ b/llvm/test/CodeGen/X86/phaddsub.ll @@ -782,66 +782,7 @@ define <8 x i16> @phaddw_single_source6(<8 x i16> %x) { ret <8 x i16> %shuffle2 } -; PR39921 -define i32 @pairwise_reduction_8i32(<8 x i32> %rdx) { -; SSSE3-SLOW-LABEL: pairwise_reduction_8i32: -; SSSE3-SLOW: # %bb.0: -; SSSE3-SLOW-NEXT: phaddd %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1 -; SSSE3-SLOW-NEXT: movd %xmm1, %eax -; SSSE3-SLOW-NEXT: retq -; -; SSSE3-FAST-LABEL: pairwise_reduction_8i32: -; SSSE3-FAST: # %bb.0: -; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm0 -; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 -; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 -; SSSE3-FAST-NEXT: movd %xmm0, %eax -; SSSE3-FAST-NEXT: retq -; -; AVX-SLOW-LABEL: pairwise_reduction_8i32: -; AVX-SLOW: # %bb.0: -; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm2, %xmm0 -; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] -; AVX-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0 -; AVX-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; AVX-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-SLOW-NEXT: vmovd %xmm0, %eax -; AVX-SLOW-NEXT: vzeroupper -; AVX-SLOW-NEXT: retq -; -; AVX-FAST-LABEL: pairwise_reduction_8i32: -; AVX-FAST: # %bb.0: -; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-FAST-NEXT: vpaddd %xmm0, %xmm2, %xmm0 -; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vmovd %xmm0, %eax -; AVX-FAST-NEXT: vzeroupper -; AVX-FAST-NEXT: retq - %rdx.shuf.0.0 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6,i32 undef, i32 undef, i32 undef, i32 undef> - %rdx.shuf.0.1 = shufflevector <8 x i32> %rdx, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7,i32 undef, i32 undef, i32 undef, i32 undef> - %bin.rdx = add <8 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 - %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef,<8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 - %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef,<8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 - %r = extractelement <8 x i32> %bin.rdx9, i32 0 - ret i32 %r -} - +; PR39921 + PR39936 define i32 @PR39936_v8i32(<8 x i32>) { ; SSSE3-SLOW-LABEL: PR39936_v8i32: ; SSSE3-SLOW: # %bb.0: @@ -900,57 +841,3 @@ define i32 @PR39936_v8i32(<8 x i32>) { ret i32 %10 } -define float @PR39936_v8f32(<8 x float>) { -; SSSE3-SLOW-LABEL: PR39936_v8f32: -; SSSE3-SLOW: # %bb.0: -; SSSE3-SLOW-NEXT: haddps %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: movaps %xmm0, %xmm1 -; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,3] -; SSSE3-SLOW-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3,2,3] -; SSSE3-SLOW-NEXT: addps %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSSE3-SLOW-NEXT: addss %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: retq -; -; SSSE3-FAST-LABEL: PR39936_v8f32: -; SSSE3-FAST: # %bb.0: -; SSSE3-FAST-NEXT: haddps %xmm1, %xmm0 -; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 -; SSSE3-FAST-NEXT: haddps %xmm0, %xmm0 -; SSSE3-FAST-NEXT: retq -; -; AVX-SLOW-LABEL: PR39936_v8f32: -; AVX-SLOW: # %bb.0: -; AVX-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-SLOW-NEXT: vaddps %xmm0, %xmm2, %xmm0 -; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,2,2,3] -; AVX-SLOW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,3,2,3] -; AVX-SLOW-NEXT: vaddps %xmm0, %xmm1, %xmm0 -; AVX-SLOW-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; AVX-SLOW-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-SLOW-NEXT: vzeroupper -; AVX-SLOW-NEXT: retq -; -; AVX-FAST-LABEL: PR39936_v8f32: -; AVX-FAST: # %bb.0: -; AVX-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm2 = xmm0[0,2],xmm1[0,2] -; AVX-FAST-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX-FAST-NEXT: vaddps %xmm0, %xmm2, %xmm0 -; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0 -; AVX-FAST-NEXT: vzeroupper -; AVX-FAST-NEXT: retq - %2 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef> - %3 = shufflevector <8 x float> %0, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 undef, i32 undef> - %4 = fadd <8 x float> %2, %3 - %5 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %6 = shufflevector <8 x float> %4, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %7 = fadd <8 x float> %5, %6 - %8 = shufflevector <8 x float> %7, <8 x float> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> - %9 = fadd <8 x float> %7, %8 - %10 = extractelement <8 x float> %9, i32 0 - ret float %10 -} |

