summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp36
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll22
-rw-r--r--llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll20
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll10
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll48
5 files changed, 71 insertions, 65 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4f4f8c9b47b..3351f1321c8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12606,8 +12606,7 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
{0, 1, 2, 3, 0, 1, 2, 3});
if (OnlyUsesV1 || isShuffleEquivalent(V1, V2, Mask,
{0, 1, 2, 3, 8, 9, 10, 11})) {
- MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(),
- VT.getVectorNumElements() / 2);
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4);
SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1,
DAG.getIntPtrConstant(0, DL));
SDValue HiV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT,
@@ -12616,8 +12615,39 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoV, HiV);
}
- // Try to lower to to vshuf64x2/vshuf32x4.
assert(WidenedMask.size() == 4);
+
+ // See if this is an insertion of the lower 128-bits of V2 into V1.
+ bool IsInsert = true;
+ int V2Index = -1;
+ for (int i = 0; i < 4; ++i) {
+ assert(WidenedMask[i] >= -1);
+ if (WidenedMask[i] < 0)
+ continue;
+
+ // Make sure all V1 subvectors are in place.
+ if (WidenedMask[i] < 4) {
+ if (WidenedMask[i] != i) {
+ IsInsert = false;
+ break;
+ }
+ } else {
+ // Make sure we only have a single V2 index and its the lowest 128-bits.
+ if (V2Index >= 0 || WidenedMask[i] != 4) {
+ IsInsert = false;
+ break;
+ }
+ V2Index = i;
+ }
+ }
+ if (IsInsert && V2Index >= 0) {
+ MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2);
+ SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2,
+ DAG.getIntPtrConstant(0, DL));
+ return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL);
+ }
+
+ // Try to lower to to vshuf64x2/vshuf32x4.
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
unsigned PermMask = 0;
// Insure elements came from the same Op.
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
index ad5656bbdc8..3c649e18bc3 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
@@ -2977,13 +2977,12 @@ define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <
; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
-; CHECK-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vblendmps %zmm3, %zmm2, %zmm0 {%k1}
-; CHECK-NEXT: vmovaps %zmm3, %zmm1 {%k1} {z}
-; CHECK-NEXT: vaddps %zmm3, %zmm0, %zmm0
-; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
%res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
@@ -2999,13 +2998,12 @@ define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x
; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
-; CHECK-NEXT: vpermi2q %zmm1, %zmm0, %zmm3
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vpblendmd %zmm3, %zmm2, %zmm0 {%k1}
-; CHECK-NEXT: vmovdqa32 %zmm3, %zmm1 {%k1} {z}
-; CHECK-NEXT: vpaddd %zmm3, %zmm0, %zmm0
-; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
%res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
index 9403a69e88e..f4cf22c5ed3 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
@@ -79,14 +79,12 @@ define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <
; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
-; CHECK-NEXT: vmovapd %zmm0, %zmm4
-; CHECK-NEXT: vpermt2pd %zmm1, %zmm3, %zmm4
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vblendmpd %zmm4, %zmm2, %zmm2 {%k1}
-; CHECK-NEXT: vpermt2pd %zmm1, %zmm3, %zmm0 {%k1} {z}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
-; CHECK-NEXT: vaddpd %zmm4, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm3, %zmm0, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
%res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
@@ -122,14 +120,12 @@ define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i6
; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %XMM1<def> %XMM1<kill> %ZMM1<def>
-; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
-; CHECK-NEXT: vmovdqa64 %zmm0, %zmm4
-; CHECK-NEXT: vpermt2q %zmm1, %zmm3, %zmm4
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovb %edi, %k1
-; CHECK-NEXT: vpblendmq %zmm4, %zmm2, %zmm2 {%k1}
-; CHECK-NEXT: vpermt2q %zmm1, %zmm3, %zmm0 {%k1} {z}
-; CHECK-NEXT: vpaddq %zmm4, %zmm0, %zmm0
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z}
; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
; CHECK-NEXT: retq
%res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
%res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
index d11c789dfaf..b951bf1c97e 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -512,10 +512,9 @@ define <16 x float> @mask_shuffle_v16f32_00_01_02_03_04_05_06_07_16_17_18_19_20_
define <16 x float> @mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x float> %a, <16 x float> %b, <16 x float> %passthru, i16 %mask) {
; ALL-LABEL: mask_shuffle_v16f32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15:
; ALL: # BB#0:
-; ALL-NEXT: vmovapd {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
-; ALL-NEXT: vpermi2pd %zmm1, %zmm0, %zmm3
; ALL-NEXT: kmovw %edi, %k1
-; ALL-NEXT: vblendmps %zmm3, %zmm2, %zmm0 {%k1}
+; ALL-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; ALL-NEXT: vmovaps %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%mask.cast = bitcast i16 %mask to <16 x i1>
@@ -539,10 +538,9 @@ define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_04_05_06_07_16_17_18_19_20_21
define <16 x i32> @mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passthru, i16 %mask) {
; ALL-LABEL: mask_shuffle_v16i32_00_01_02_03_16_17_18_19_08_09_10_11_12_13_14_15:
; ALL: # BB#0:
-; ALL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,9,4,5,6,7]
-; ALL-NEXT: vpermi2q %zmm1, %zmm0, %zmm3
; ALL-NEXT: kmovw %edi, %k1
-; ALL-NEXT: vpblendmd %zmm3, %zmm2, %zmm0 {%k1}
+; ALL-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; ALL-NEXT: vmovdqa64 %zmm2, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%mask.cast = bitcast i16 %mask to <16 x i1>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 485c197d514..365ff3bf63d 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -2463,14 +2463,12 @@ define <8 x i64> @shuffle_v8i64_01230123(<8 x i64> %a, <8 x i64> %b) {
define <8 x double> @shuffle_v8f64_89234567(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_89234567:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [8,9,2,3,4,5,6,7]
-; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_89234567:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [8,0,9,0,2,0,3,0,4,0,5,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -2479,14 +2477,12 @@ define <8 x double> @shuffle_v8f64_89234567(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01894567(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_01894567:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [0,1,8,9,4,5,6,7]
-; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_01894567:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,1,0,8,0,9,0,4,0,5,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -2495,14 +2491,12 @@ define <8 x double> @shuffle_v8f64_01894567(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01238967(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_01238967:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [0,1,2,3,8,9,6,7]
-; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_01238967:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,1,0,2,0,3,0,8,0,9,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
ret <8 x double> %shuffle
@@ -2511,14 +2505,12 @@ define <8 x double> @shuffle_v8f64_01238967(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_01234589(<8 x double> %a, <8 x double> %b) {
; AVX512F-LABEL: shuffle_v8f64_01234589:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovapd {{.*#+}} zmm2 = [0,1,2,3,4,5,8,9]
-; AVX512F-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8f64_01234589:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovapd {{.*#+}} zmm2 = [0,0,1,0,2,0,3,0,4,0,5,0,8,0,9,0]
-; AVX512F-32-NEXT: vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
ret <8 x double> %shuffle
@@ -2527,14 +2519,12 @@ define <8 x double> @shuffle_v8f64_01234589(<8 x double> %a, <8 x double> %b) {
define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_89234567:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,9,2,3,4,5,6,7]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_89234567:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [8,0,9,0,2,0,3,0,4,0,5,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -2543,14 +2533,12 @@ define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_01894567:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,8,9,4,5,6,7]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_01894567:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,1,0,8,0,9,0,4,0,5,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -2559,14 +2547,12 @@ define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_01238967:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,8,9,6,7]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_01238967:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,1,0,2,0,3,0,8,0,9,0,6,0,7,0]
-; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
ret <8 x i64> %shuffle
@@ -2575,14 +2561,12 @@ define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01234589(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_01234589:
; AVX512F: # BB#0:
-; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,3,4,5,8,9]
-; AVX512F-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq
;
; AVX512F-32-LABEL: shuffle_v8i64_01234589:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,1,0,2,0,3,0,4,0,5,0,8,0,9,0]
-; AVX512F-32-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
ret <8 x i64> %shuffle
OpenPOWER on IntegriCloud