summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-12-23 09:46:49 -0500
committerSanjay Patel <spatel@rotateright.com>2019-12-23 10:11:45 -0500
commit8cefc37be5aba4948936c7beb97cde7a68449f1f (patch)
tree4300a3310408f9a85c58607cf986653664079d6f /llvm/test
parent0860db966a7d2ab61b26e41426a55189986924b4 (diff)
downloadbcm5719-llvm-8cefc37be5aba4948936c7beb97cde7a68449f1f.tar.gz
bcm5719-llvm-8cefc37be5aba4948936c7beb97cde7a68449f1f.zip
[DAGCombine] visitEXTRACT_SUBVECTOR - 'little to big' extract_subvector(bitcast()) support
This moves the X86 specific transform from rL364407 into DAGCombiner to generically handle 'little to big' cases (for example: extract_subvector(v2i64 bitcast(v16i8))). This allows us to remove both the x86 implementation and the aarch64 bitcast(extract_subvector(bitcast())) combine. Earlier patches that dealt with regressions initially exposed by this patch: rG5e5e99c041e4 rG0b38af89e2c0 Patch by: @RKSimon (Simon Pilgrim) Differential Revision: https://reviews.llvm.org/D63815
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AArch64/merge-store.ll20
-rw-r--r--llvm/test/CodeGen/ARM/combine-vmovdrr.ll4
-rw-r--r--llvm/test/CodeGen/X86/avg-mask.ll68
-rw-r--r--llvm/test/CodeGen/X86/madd.ll12
-rw-r--r--llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll4
-rw-r--r--llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll36
6 files changed, 66 insertions, 78 deletions
diff --git a/llvm/test/CodeGen/AArch64/merge-store.ll b/llvm/test/CodeGen/AArch64/merge-store.ll
index cd9564210e9..f0a53384cdd 100644
--- a/llvm/test/CodeGen/AArch64/merge-store.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store.ll
@@ -42,17 +42,10 @@ define void @blam() {
; the fastness of unaligned accesses was not specified correctly.
define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
-; SPLITTING-LABEL: merge_vec_extract_stores:
-; SPLITTING: // %bb.0:
-; SPLITTING-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; SPLITTING-NEXT: str d0, [x0, #24]
-; SPLITTING-NEXT: str d1, [x0, #32]
-; SPLITTING-NEXT: ret
-;
-; MISALIGNED-LABEL: merge_vec_extract_stores:
-; MISALIGNED: // %bb.0:
-; MISALIGNED-NEXT: stur q0, [x0, #24]
-; MISALIGNED-NEXT: ret
+; CHECK-LABEL: merge_vec_extract_stores:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stur q0, [x0, #24]
+; CHECK-NEXT: ret
%idx0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3
%idx1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 4
@@ -62,9 +55,4 @@ define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
store <2 x float> %shuffle0, <2 x float>* %idx0, align 8
store <2 x float> %shuffle1, <2 x float>* %idx1, align 8
ret void
-
-
-; FIXME: Ideally we would like to use a generic target for this test, but this relies
-; on suppressing store pairs.
-
}
diff --git a/llvm/test/CodeGen/ARM/combine-vmovdrr.ll b/llvm/test/CodeGen/ARM/combine-vmovdrr.ll
index 358f7e3a983..01526b37199 100644
--- a/llvm/test/CodeGen/ARM/combine-vmovdrr.ll
+++ b/llvm/test/CodeGen/ARM/combine-vmovdrr.ll
@@ -9,8 +9,8 @@ declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffl
; they are defined on VPRs and used on VPRs.
;
; CHECK-LABEL: motivatingExample:
-; CHECK: vldr [[ARG2_VAL:d[0-9]+]], [r1]
-; CHECK-NEXT: vld1.32 {[[ARG1_VALlo:d[0-9]+]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK: vld1.32 {[[ARG1_VALlo:d[0-9]+]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK-NEXT: vldr [[ARG2_VAL:d[0-9]+]], [r1]
; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALlo]], [[ARG1_VALhi]]}, [[ARG2_VAL]]
; CHECK-NEXT: vstr [[RES]], [r1]
; CHECK-NEXT: bx lr
diff --git a/llvm/test/CodeGen/X86/avg-mask.ll b/llvm/test/CodeGen/X86/avg-mask.ll
index a7ce07ab0cd..ebe9e75ab4f 100644
--- a/llvm/test/CodeGen/X86/avg-mask.ll
+++ b/llvm/test/CodeGen/X86/avg-mask.ll
@@ -130,9 +130,9 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64
; AVX512F-NEXT: shrq $32, %rdi
; AVX512F-NEXT: shrq $48, %rax
; AVX512F-NEXT: shrl $16, %ecx
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; AVX512F-NEXT: vpavgb %ymm4, %ymm5, %ymm4
+; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm4
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kmovw %eax, %k3
@@ -142,14 +142,14 @@ define <64 x i8> @avg_v64i8_mask(<64 x i8> %a, <64 x i8> %b, <64 x i8> %src, i64
; AVX512F-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k3} {z}
; AVX512F-NEXT: vpmovdb %zmm5, %xmm5
; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm3, %ymm1
-; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm3, %ymm0
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
-; AVX512F-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k2} {z}
-; AVX512F-NEXT: vpmovdb %zmm4, %xmm4
-; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3
-; AVX512F-NEXT: vpblendvb %ymm3, %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm2, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v64i8_mask:
@@ -178,9 +178,9 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
; AVX512F-NEXT: shrq $32, %rdi
; AVX512F-NEXT: shrq $48, %rax
; AVX512F-NEXT: shrl $16, %ecx
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpavgb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kmovw %eax, %k3
@@ -190,14 +190,14 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k3} {z}
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
+; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
; AVX512F-NEXT: vpmovdb %zmm3, %xmm3
-; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
-; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v64i8_maskz:
@@ -330,18 +330,18 @@ define <32 x i16> @avg_v32i16_mask(<32 x i16> %a, <32 x i16> %b, <32 x i16> %src
; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm4
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm5
-; AVX512F-NEXT: vpavgw %ymm4, %ymm5, %ymm4
+; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm4
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
-; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm3, %ymm1
-; AVX512F-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
-; AVX512F-NEXT: vpmovdw %zmm3, %ymm3
-; AVX512F-NEXT: vpblendvb %ymm3, %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm0, %ymm3, %ymm0
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512F-NEXT: vpblendvb %ymm1, %ymm4, %ymm2, %ymm1
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v32i16_mask:
@@ -366,18 +366,18 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpavgw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
+; AVX512F-NEXT: vpand %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; AVX512F-NEXT: vpmovdw %zmm2, %ymm2
-; AVX512F-NEXT: vpand %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: retq
;
; AVX512BWVL-LABEL: avg_v32i16_maskz:
diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll
index 62792ec074a..11756574217 100644
--- a/llvm/test/CodeGen/X86/madd.ll
+++ b/llvm/test/CodeGen/X86/madd.ll
@@ -1975,9 +1975,9 @@ define <16 x i32> @pmaddwd_32(<32 x i16> %A, <32 x i16> %B) {
;
; AVX512F-LABEL: pmaddwd_32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; AVX512F-NEXT: vpmaddwd %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
@@ -2188,9 +2188,9 @@ define <16 x i32> @jumbled_indices16(<32 x i16> %A, <32 x i16> %B) {
;
; AVX512F-LABEL: jumbled_indices16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm3
+; AVX512F-NEXT: vpmaddwd %ymm3, %ymm2, %ymm2
; AVX512F-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512F-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
index cc73f563581..b79fb5c35f3 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll
@@ -6374,9 +6374,9 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
;
; AVX512F-LABEL: truncstore_v32i16_v32i8:
; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX512F-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
-; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1
; AVX512F-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX512F-NEXT: vpmovmskb %ymm1, %eax
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
index 720cabee912..7f00b49b81f 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll
@@ -725,33 +725,33 @@ define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_
define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62(<64 x i8> %x) {
; AVX512F-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm2
+; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
-; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
-; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm2
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
-; AVX512VL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX512VL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
-; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
OpenPOWER on IntegriCloud