diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll | 3 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll | 38 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll | 72 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-v1.ll | 8 |
8 files changed, 158 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 866d11f62dc..322a0133b29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8291,10 +8291,28 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, DAG.getMachineFunction().getMachineMemOperand( Ld->getMemOperand(), Offset, SVT.getStoreSize())); - } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) { - // We can't broadcast from a vector register without AVX2, and we can only - // broadcast from the zero-element of a vector register. + } else if (!Subtarget->hasAVX2()) { + // We can't broadcast from a vector register without AVX2. return SDValue(); + } else if (BroadcastIdx != 0) { + // We can only broadcast from the zero-element of a vector register, + // but it can be advantageous to broadcast from the zero-element of a + // subvector. + if (!VT.is256BitVector() && !VT.is512BitVector()) + return SDValue(); + + // VPERMQ/VPERMPD can perform the cross-lane shuffle directly. + if (VT == MVT::v4f64 || VT == MVT::v4i64) + return SDValue(); + + // Only broadcast the zero-element of a 128-bit subvector. + unsigned EltSize = VT.getScalarSizeInBits(); + if (((BroadcastIdx * EltSize) % 128) != 0) + return SDValue(); + + MVT ExtVT = MVT::getVectorVT(VT.getScalarType(), 128 / EltSize); + V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtVT, V, + DAG.getIntPtrConstant(BroadcastIdx, DL)); } V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll index 7e3dc6e294f..3504734e48d 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v16.ll @@ -2904,8 +2904,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_2 ; ; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_01_uu_05_07_25_uu_uu_uu_09_uu_13_15_25: ; AVX2: # BB#0: -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 -; AVX2-NEXT: vpermd %ymm1, %ymm2, %ymm1 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 ; AVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,1,2,1,4,5,6,7,8,9,10,9,12,13,14,15] ; AVX2-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,7,7,8,9,10,11,12,13,15,15] ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15] @@ -3293,8 +3293,7 @@ define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, ; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8: ; AVX2: # BB#0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> ret <16 x i16> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll index 161a21cef03..4a4e4a9f1fb 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll @@ -2006,8 +2006,7 @@ define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_ ; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16: ; AVX2: # BB#0: ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> ret <32 x i8> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index a4b313f9e05..485b79c5cfc 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -851,8 +851,8 @@ define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) { ; ; AVX2-LABEL: shuffle_v8f32_44444444: ; AVX2: # BB#0: -; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 -; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> ret <8 x float> %shuffle @@ -2015,8 +2015,8 @@ define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { ; ; AVX2-LABEL: shuffle_v8i32_44444444: ; AVX2: # BB#0: -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1 -; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0 +; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 ; AVX2-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> ret <8 x i32> %shuffle diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index bef54b05041..ff3ece13a47 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -4,6 +4,25 @@ target triple = "x86_64-unknown-unknown" +define <16 x float> @shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x float> %a, <16 x float> %b) { +; ALL-LABEL: shuffle_v16f32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; ALL: # BB#0: +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 +; ALL-NEXT: retq + %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + ret <16 x float> %shuffle +} + +define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<16 x float> %a, <16 x float> %b) { +; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: +; ALL: # BB#0: +; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 +; ALL-NEXT: retq + %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32><i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + ret <16 x float> %shuffle +} + define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x float> %a, <16 x float> %b) { ; ALL-LABEL: shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d: ; ALL: # BB#0: @@ -70,6 +89,25 @@ define <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz ret <16 x float> %shuffle } +define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i32> %a, <16 x i32> %b) { +; ALL-LABEL: shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; ALL: # BB#0: +; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: retq + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> + ret <16 x i32> %shuffle +} + +define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) { +; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: +; ALL: # BB#0: +; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 +; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: retq + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> + ret <16 x i32> %shuffle +} + define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f: ; ALL: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll index ab809beb4b4..a3cdaf09b64 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -3,6 +3,25 @@ target triple = "x86_64-unknown-unknown" +define <32 x i16> @shuffle_v32i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<32 x i16> %a) { +; ALL-LABEL: shuffle_v32i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00: +; ALL: # BB#0: +; ALL-NEXT: vpbroadcastw %xmm0, %zmm0 +; ALL-NEXT: retq + %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> zeroinitializer + ret <32 x i16> %c +} + +define <32 x i16> @shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08(<32 x i16> %a) { +; ALL-LABEL: shuffle_v32i16_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08: +; ALL: # BB#0: +; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 +; ALL-NEXT: vpbroadcastw %xmm0, %zmm0 +; ALL-NEXT: retq + %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> + ret <32 x i16> %c +} + define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f(<32 x i16> %a) { ; ALL-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f: ; ALL: # BB#0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll index 631968f6afa..28a720f8583 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -18,6 +18,38 @@ define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) { ret <8 x double> %shuffle } +define <8 x double> @shuffle_v8f64_22222222(<8 x double> %a, <8 x double> %b) { +; AVX512F-LABEL: shuffle_v8f64_22222222: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm0 +; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: shuffle_v8f64_22222222: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextractf32x4 $1, %zmm0, %xmm0 +; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0 +; AVX512F-32-NEXT: retl + %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> + ret <8 x double> %shuffle +} + +define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) { +; AVX512F-LABEL: shuffle_v8f64_44444444: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: shuffle_v8f64_44444444: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0 +; AVX512F-32-NEXT: retl + %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> + ret <8 x double> %shuffle +} + define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) { ; AVX512F-LABEL: shuffle_v8f64_00000010: ; AVX512F: # BB#0: @@ -994,6 +1026,38 @@ define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) { ret <8 x i64> %shuffle } +define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) { +; AVX512F-LABEL: shuffle_v8i64_44444444: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: shuffle_v8i64_44444444: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-32-NEXT: retl + %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> + ret <8 x i64> %shuffle +} + +define <8 x i64> @shuffle_v8i64_66666666(<8 x i64> %a, <8 x i64> %b) { +; AVX512F-LABEL: shuffle_v8i64_66666666: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: shuffle_v8i64_66666666: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-32-NEXT: retl + %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6> + ret <8 x i64> %shuffle +} + define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) { ; ; AVX512F-LABEL: shuffle_v8i64_00000010: @@ -2102,7 +2166,7 @@ define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1 ; AVX512F-32-LABEL: test_vshuff64x2_512_maskz: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2 -; AVX512F-32-NEXT: vpsllvq .LCPI122_0, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpsllvq .LCPI126_0, %zmm2, %zmm2 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-32-NEXT: retl @@ -2123,7 +2187,7 @@ define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> ; AVX512F-32-LABEL: test_vshufi64x2_512_mask: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm2, %zmm2 -; AVX512F-32-NEXT: vpsllvq .LCPI123_0, %zmm2, %zmm2 +; AVX512F-32-NEXT: vpsllvq .LCPI127_0, %zmm2, %zmm2 ; AVX512F-32-NEXT: vptestmq %zmm2, %zmm2, %k1 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1] ; AVX512F-32-NEXT: retl @@ -2160,7 +2224,7 @@ define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1 -; AVX512F-32-NEXT: vpsllvq .LCPI125_0, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpsllvq .LCPI129_0, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1] @@ -2183,7 +2247,7 @@ define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: vpmovsxwq %xmm1, %zmm1 -; AVX512F-32-NEXT: vpsllvq .LCPI126_0, %zmm1, %zmm1 +; AVX512F-32-NEXT: vpsllvq .LCPI130_0, %zmm1, %zmm1 ; AVX512F-32-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %eax ; AVX512F-32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1] diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index b2727e2368a..1ee097b076f 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -165,8 +165,8 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) { ; AVX512F-NEXT: kmovw %edi, %k1 ; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 ; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm1 {%k1} {z} -; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2 -; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1 +; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm1 +; AVX512F-NEXT: vpbroadcastq %xmm1, %zmm1 ; AVX512F-NEXT: vpsllq $63, %zmm1, %zmm1 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k1 ; AVX512F-NEXT: vmovdqu64 %zmm0, %zmm0 {%k1} {z} @@ -177,8 +177,8 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) { ; VL_BW_DQ: # BB#0: ; VL_BW_DQ-NEXT: kmovb %edi, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0 -; VL_BW_DQ-NEXT: vpbroadcastq {{.*}}(%rip), %zmm1 -; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0 +; VL_BW_DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm0 +; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %zmm0 ; VL_BW_DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0 ; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0 |