diff options
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-schedule.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-schedule.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll | 32 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll | 48 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll | 51 |
7 files changed, 50 insertions, 103 deletions
diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index a3e6a18fbc9..44d13db65c9 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -3447,8 +3447,8 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou ; ; SKX-LABEL: test_perm2f128: ; SKX: # BB#0: -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 8febe046d81..cec8ca94409 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -2531,8 +2531,8 @@ define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; ; SKX-LABEL: test_perm2i128: ; SKX: # BB#0: -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] -; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] +; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 9098ca30897..35fecf8955c 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2729,8 +2729,8 @@ define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1,2,3],ymm1[4,5,6,7] ; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x23,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] -; CHECK-NEXT: vshuff32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x23,0xc1,0x16] -; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30] +; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0] ; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe4,0x58,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] @@ -2752,7 +2752,7 @@ define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 ; CHECK-NEXT: ## ymm3 {%k1} {z} = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x23,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] -; CHECK-NEXT: vshuff64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x23,0xc1,0x16] +; CHECK-NEXT: vperm2f128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x06,0xc1,0x30] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0] ; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0] @@ -2773,8 +2773,8 @@ define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x43,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1,2,3],ymm1[4,5,6,7] -; CHECK-NEXT: vshufi32x4 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x43,0xc1,0x16] -; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30] +; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4) @@ -2791,7 +2791,7 @@ define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64 ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] ; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x43,0xd1,0x16] ; CHECK-NEXT: ## ymm2 {%k1} = ymm0[0,1],ymm1[2,3] -; CHECK-NEXT: vshufi64x2 $22, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x28,0x43,0xc1,0x16] +; CHECK-NEXT: vperm2i128 $48, %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x46,0xc1,0x30] ; CHECK-NEXT: ## ymm0 = ymm0[0,1],ymm1[2,3] ; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index cbc190d0db3..7ef5bee5420 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -3975,7 +3975,7 @@ define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_b ; ; AVX512VL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: retq %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index c5c2312b161..3c69f6160dd 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -1682,17 +1682,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_32_32_32_32_32_32_32_32_40_40_40_40_40_40_40_40: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40, i32 40> ret <32 x i8> %shuffle } @@ -1708,17 +1702,11 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_ ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_24_24_24_24_24_24_24_24_48_48_48_48_48_48_48_48_56_56_56_56_56_56_56_56: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,24,24,24,24,24,24,24,24] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56> ret <32 x i8> %shuffle } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 6e0e80b4016..56567c7e794 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -418,45 +418,21 @@ define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { } define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_3254: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_3254: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_3254: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] -; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_3254: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> ret <4 x double> %shuffle } define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { -; AVX1-LABEL: shuffle_v4f64_3276: -; AVX1: # BB#0: -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4f64_3276: -; AVX2: # BB#0: -; AVX2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4f64_3276: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4f64_3276: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; ALL-NEXT: retq %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> ret <4 x double> %shuffle } @@ -1077,7 +1053,7 @@ define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_3254: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> @@ -1099,7 +1075,7 @@ define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { ; ; AVX512VL-LABEL: shuffle_v4i64_3276: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] ; AVX512VL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index 38891b46577..e4234c05845 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -768,33 +768,21 @@ define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) { } define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_7654fedc: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_7654fedc: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_7654fedc: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> ret <8 x float> %shuffle } define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) { -; AVX1OR2-LABEL: shuffle_v8f32_fedc7654: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] -; AVX1OR2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_fedc7654: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] -; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v8f32_fedc7654: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] +; ALL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> ret <8 x float> %shuffle } @@ -1809,7 +1797,7 @@ define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) { ; ; AVX512VL-LABEL: shuffle_v8i32_7654fedc: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12> @@ -1825,7 +1813,7 @@ define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) { ; ; AVX512VL-LABEL: shuffle_v8i32_fedc7654: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3] ; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4> @@ -2208,7 +2196,7 @@ define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { ; ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc: ; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshufi64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; AVX512VL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] ; AVX512VL-NEXT: retq %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15> @@ -2220,15 +2208,10 @@ define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) { } define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) { -; AVX1OR2-LABEL: concat_v8f32_4567CDEF_bc: -; AVX1OR2: # BB#0: -; AVX1OR2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX1OR2-NEXT: retq -; -; AVX512VL-LABEL: concat_v8f32_4567CDEF_bc: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vshuff64x2 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] -; AVX512VL-NEXT: retq +; ALL-LABEL: concat_v8f32_4567CDEF_bc: +; ALL: # BB#0: +; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] +; ALL-NEXT: retq %a0 = bitcast <8 x float> %f0 to <4 x i64> %a1 = bitcast <8 x float> %f1 to <8 x i32> %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3> |

