diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-06-11 13:43:21 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-06-11 13:43:21 +0000 |
| commit | 504fba5c8a56e6532a715ea6e7cb188ab91164a5 (patch) | |
| tree | 6b54c4d6c3c69764934e00ae1dcfa6649a4a69f8 /llvm/test | |
| parent | c1cb881d5576d4df6e668a3c598c27c233752f45 (diff) | |
| download | bcm5719-llvm-504fba5c8a56e6532a715ea6e7cb188ab91164a5.tar.gz bcm5719-llvm-504fba5c8a56e6532a715ea6e7cb188ab91164a5.zip | |
[AVX512] Lower v8i64 and v16i32 to pshufd when possible.
llvm-svn: 272473
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/sad.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll | 1 |
3 files changed, 33 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index 98ba509bfde..91a4f16e22d 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F @@ -76,9 +76,9 @@ define i32 @sad_16i8() nounwind { ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7] +; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm1 +; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vmovd %xmm0, %eax ; AVX512F-NEXT: retq @@ -101,9 +101,9 @@ define i32 @sad_16i8() nounwind { ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 +; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: retq @@ -323,9 +323,9 @@ define i32 @sad_32i8() nounwind { ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7] +; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm1 +; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vmovd %xmm0, %eax ; AVX512F-NEXT: retq @@ -350,9 +350,9 @@ define i32 @sad_32i8() nounwind { ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 +; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: retq @@ -805,9 +805,9 @@ define i32 @sad_avx64i8() nounwind { ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpunpckhqdq {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7] +; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm1 +; AVX512F-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15] ; AVX512F-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vmovd %xmm0, %eax ; AVX512F-NEXT: retq @@ -833,9 +833,9 @@ define i32 @sad_avx64i8() nounwind { ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm0[2,3,0,1,0,1,0,1] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrldq {{.*#+}} zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[2,3,2,3,6,7,6,7,10,11,10,11,14,15,14,15] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 +; AVX512BW-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,1,2,3,5,5,6,7,9,9,10,11,13,13,14,15] ; AVX512BW-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovd %xmm0, %eax ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index 12a913c74f8..c3d3f7eaf4d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW @@ -225,3 +225,21 @@ define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { %v2 = shufflevector <16 x float> %v_a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ret <8 x float> %v2 } + +define <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b) { +; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12: +; ALL: # BB#0: +; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12] +; ALL-NEXT: retq + %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12> + ret <16 x i32> %c +} + +define <16 x i32> @shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12(<16 x i32> %a, <16 x i32> %b) { +; ALL-LABEL: shuffle_v16i16_3_3_0_0_7_7_4_4_11_11_8_8_15_15_12_12: +; ALL: # BB#0: +; ALL-NEXT: vpshufd {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13] +; ALL-NEXT: retq + %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13> + ret <16 x i32> %c +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index 067c3f1f3ea..a4455e76d5b 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -107,3 +107,4 @@ define <32 x i16> @shuffle_v32i16_1_1_0_0_5_5_4_4_9_9_11_11_13_13_12_12_17_17_19 %c = shufflevector <32 x i16> %a, <32 x i16> zeroinitializer, <32 x i32> <i32 1, i32 1, i32 0, i32 0, i32 5, i32 5, i32 4, i32 4, i32 9, i32 9, i32 8, i32 8, i32 13, i32 13, i32 12, i32 12, i32 17, i32 17, i32 16, i32 16, i32 21, i32 21, i32 20, i32 20, i32 25, i32 25, i32 24, i32 24, i32 29, i32 29, i32 28, i32 28> ret <32 x i16> %c } + |

