diff options
author | Craig Topper <craig.topper@intel.com> | 2017-09-18 04:40:58 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2017-09-18 04:40:58 +0000 |
commit | a6054328e8b31217bec6e6d5dcbb28ca304d48d1 (patch) | |
tree | 5d1cb5f095fe831f3345ebbd1ce4fb5d72bd64c0 /llvm/test/CodeGen | |
parent | 87f7381edf47c104f3067827cd93df5d0009e103 (diff) | |
download | bcm5719-llvm-a6054328e8b31217bec6e6d5dcbb28ca304d48d1.tar.gz bcm5719-llvm-a6054328e8b31217bec6e6d5dcbb28ca304d48d1.zip |
[X86] Teach the execution domain fixing tables to use movlhps inplace of unpcklpd for the packed single domain.
MOVLHPS has a smaller encoding than UNPCKLPD in the legacy encodings. With VEX and EVEX encodings it doesn't matter.
llvm-svn: 313509
Diffstat (limited to 'llvm/test/CodeGen')
41 files changed, 360 insertions, 360 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index fe3501823f9..23bf14d7fe3 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -2246,15 +2246,15 @@ define <4 x double> @test_mm256_set_pd(double %a0, double %a1, double %a2, doubl ; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero -; X32-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_set_pd: ; X64: # BB#0: -; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0] +; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X64-NEXT: retq %res0 = insertelement <4 x double> undef, double %a3, i32 0 @@ -2883,15 +2883,15 @@ define <4 x double> @test_mm256_setr_pd(double %a0, double %a1, double %a2, doub ; X32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; X32-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero -; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X32-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm3[0],xmm2[0] +; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X32-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm2[0] ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_setr_pd: ; X64: # BB#0: -; X64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; X64-NEXT: retq %res0 = insertelement <4 x double> undef, double %a0, i32 0 diff --git a/llvm/test/CodeGen/X86/avx-unpack.ll b/llvm/test/CodeGen/X86/avx-unpack.ll index 88d1de4f00d..801a0ceac0d 100644 --- a/llvm/test/CodeGen/X86/avx-unpack.ll +++ b/llvm/test/CodeGen/X86/avx-unpack.ll @@ -52,7 +52,7 @@ define <4 x double> @unpacklopd_not(<4 x double> %src1, <4 x double> %src2) noun ; CHECK-LABEL: unpacklopd_not: ; CHECK: # BB#0: ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; CHECK-NEXT: retq %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -76,7 +76,7 @@ define <4 x double> @unpackhipd_not(<4 x double> %src1, <4 x double> %src2) noun ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; CHECK-NEXT: retq %shuffle.i = shufflevector <4 x double> %src1, <4 x double> %src2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index a89aa597dce..1aa01597375 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -25,25 +25,25 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -64,12 +64,12 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; NODQ-NEXT: retq ; @@ -95,7 +95,7 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; NODQ-NEXT: retq ; ; VLDQ-LABEL: slto2f64: @@ -425,25 +425,25 @@ define <8 x double> @ulto8f64(<8 x i64> %a) { ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm0 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -466,11 +466,11 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; KNL-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; KNL-NEXT: vpextrq $1, %xmm4, %rax -; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 ; KNL-NEXT: vmovq %xmm4, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0] ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax @@ -478,11 +478,11 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm0, %rax -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm0, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax @@ -490,25 +490,25 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2 ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; KNL-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; KNL-NEXT: vpextrq $1, %xmm1, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm1, %rax ; KNL-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; KNL-NEXT: retq @@ -526,25 +526,25 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3 ; VLNODQ-NEXT: vmovq %xmm2, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; VLNODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm0, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; VLNODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 @@ -552,25 +552,25 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 ; VLNODQ-NEXT: vmovq %xmm2, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; VLNODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; VLNODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vpextrq $1, %xmm1, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm1, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; VLNODQ-NEXT: retq @@ -582,25 +582,25 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm3 ; AVX512BW-NEXT: vmovq %xmm2, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm2 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm4, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm0, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm0 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 @@ -608,25 +608,25 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 ; AVX512BW-NEXT: vmovq %xmm2, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm2 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm1, %rax ; AVX512BW-NEXT: vcvtusi2sdq %rax, %xmm5, %xmm1 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512BW-NEXT: retq @@ -1354,25 +1354,25 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm0 -; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -1395,11 +1395,11 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; KNL-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 ; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; KNL-NEXT: vpextrq $1, %xmm4, %rax -; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 ; KNL-NEXT: vmovq %xmm4, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm4[0],xmm3[0] ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; KNL-NEXT: vextracti128 $1, %ymm0, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax @@ -1407,11 +1407,11 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm0, %rax -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm0, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax @@ -1419,25 +1419,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2 ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0] ; KNL-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; KNL-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; KNL-NEXT: vextracti128 $1, %ymm1, %xmm3 ; KNL-NEXT: vpextrq $1, %xmm3, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm3, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; KNL-NEXT: vpextrq $1, %xmm1, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; KNL-NEXT: vmovq %xmm1, %rax ; KNL-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1 -; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; KNL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; KNL-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; KNL-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; KNL-NEXT: retq @@ -1455,25 +1455,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3 ; VLNODQ-NEXT: vmovq %xmm2, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; VLNODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; VLNODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; VLNODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 @@ -1481,25 +1481,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 ; VLNODQ-NEXT: vmovq %xmm2, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; VLNODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; VLNODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 ; VLNODQ-NEXT: vpextrq $1, %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm3, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; VLNODQ-NEXT: vpextrq $1, %xmm1, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; VLNODQ-NEXT: vmovq %xmm1, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; VLNODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; VLNODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; VLNODQ-NEXT: retq @@ -1511,25 +1511,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm3 ; AVX512BW-NEXT: vmovq %xmm2, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm2 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm4, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm0, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm0 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm2 @@ -1537,25 +1537,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 ; AVX512BW-NEXT: vmovq %xmm2, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm2 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; AVX512BW-NEXT: vextracti128 $1, %ymm1, %xmm3 ; AVX512BW-NEXT: vpextrq $1, %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm3, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm3 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX512BW-NEXT: vpextrq $1, %xmm1, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm4 ; AVX512BW-NEXT: vmovq %xmm1, %rax ; AVX512BW-NEXT: vcvtsi2sdq %rax, %xmm5, %xmm1 -; AVX512BW-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; AVX512BW-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; AVX512BW-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX512BW-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512BW-NEXT: retq @@ -2241,7 +2241,7 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm1 ; VLNODQ-NEXT: vmovq %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VLNODQ-NEXT: retq %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> @@ -2593,7 +2593,7 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm1 ; VLNODQ-NEXT: vmovq %xmm0, %rax ; VLNODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; VLNODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VLNODQ-NEXT: retq %mask = icmp ult <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x double> diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index f2f36c414c3..2bfc8b63816 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -882,7 +882,7 @@ define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) { define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) { ; CHECK-LABEL: test_insert_128_v8f64: ; CHECK: ## BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm1 = xmm0[0],xmm1[0] ; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %r = insertelement <8 x double> %x, double %y, i32 1 diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 3c078ca6589..98dbe9486ac 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -1906,7 +1906,7 @@ define <2 x i64> @test_4xi64_to_2xi64_perm_mask0(<4 x i64> %vec) { ; CHECK-LABEL: test_4xi64_to_2xi64_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <2 x i32> <i32 2, i32 0> @@ -2313,7 +2313,7 @@ define <2 x i64> @test_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec) { ; CHECK: # BB#0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 3, i32 0> @@ -2684,7 +2684,7 @@ define <2 x i64> @test_8xi64_to_2xi64_perm_mem_mask0(<8 x i64>* %vp) { ; CHECK-NEXT: vmovaps (%rdi), %zmm0 ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %vec = load <8 x i64>, <8 x i64>* %vp @@ -3703,7 +3703,7 @@ define <2 x double> @test_4xdouble_to_2xdouble_perm_mask0(<4 x double> %vec) { ; CHECK-LABEL: test_4xdouble_to_2xdouble_perm_mask0: ; CHECK: # BB#0: ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %res = shufflevector <4 x double> %vec, <4 x double> undef, <2 x i32> <i32 2, i32 0> diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll b/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll index f11cd20896b..945a3f3b69b 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/unpack.ll @@ -718,7 +718,7 @@ define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) { ; CHECK-LABEL: test_2xdouble_unpack_low_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2> ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll index 531c6de5f90..384c0828dc6 100644 --- a/llvm/test/CodeGen/X86/build-vector-128.ll +++ b/llvm/test/CodeGen/X86/build-vector-128.ll @@ -16,7 +16,7 @@ define <2 x double> @test_buildvector_v2f64(double %a0, double %a1) { ; ; SSE-64-LABEL: test_buildvector_v2f64: ; SSE-64: # BB#0: -; SSE-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-64-NEXT: retq ; ; AVX-32-LABEL: test_buildvector_v2f64: @@ -26,7 +26,7 @@ define <2 x double> @test_buildvector_v2f64(double %a0, double %a1) { ; ; AVX-64-LABEL: test_buildvector_v2f64: ; AVX-64: # BB#0: -; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-64-NEXT: retq %ins0 = insertelement <2 x double> undef, double %a0, i32 0 %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 @@ -43,7 +43,7 @@ define <4 x float> @test_buildvector_v4f32(float %a0, float %a1, float %a2, floa ; SSE2-64: # BB#0: ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-64-NEXT: retq ; ; SSE41-64-LABEL: test_buildvector_v4f32: diff --git a/llvm/test/CodeGen/X86/build-vector-256.ll b/llvm/test/CodeGen/X86/build-vector-256.ll index 942b7779abe..4b077cc2469 100644 --- a/llvm/test/CodeGen/X86/build-vector-256.ll +++ b/llvm/test/CodeGen/X86/build-vector-256.ll @@ -12,8 +12,8 @@ define <4 x double> @test_buildvector_v4f64(double %a0, double %a1, double %a2, ; ; AVX-64-LABEL: test_buildvector_v4f64: ; AVX-64: # BB#0: -; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX-64-NEXT: retq %ins0 = insertelement <4 x double> undef, double %a0, i32 0 diff --git a/llvm/test/CodeGen/X86/build-vector-512.ll b/llvm/test/CodeGen/X86/build-vector-512.ll index fbfbf2d53c6..ca83da93eb7 100644 --- a/llvm/test/CodeGen/X86/build-vector-512.ll +++ b/llvm/test/CodeGen/X86/build-vector-512.ll @@ -12,11 +12,11 @@ define <8 x double> @test_buildvector_v8f64(double %a0, double %a1, double %a2, ; ; AVX-64-LABEL: test_buildvector_v8f64: ; AVX-64: # BB#0: -; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] -; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0] +; AVX-64-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX-64-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0] ; AVX-64-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 -; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX-64-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX-64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll index cd5abc1373b..3d7a221d50a 100644 --- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll +++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll @@ -43,7 +43,7 @@ define <4 x float> @test_negative_zero_1(<4 x float> %A) { ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_negative_zero_1: @@ -77,7 +77,7 @@ define <4 x float> @test_buildvector_v4f32_register(float %f0, float %f1, float ; SSE2: # BB#0: ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_register: @@ -102,7 +102,7 @@ define <4 x float> @test_buildvector_v4f32_load(float* %p0, float* %p1, float* % ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_load: @@ -129,7 +129,7 @@ define <4 x float> @test_buildvector_v4f32_partial_load(float %f0, float %f1, fl ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: test_buildvector_v4f32_partial_load: diff --git a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll index b92161d83f3..6abb3019d7a 100644 --- a/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll +++ b/llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll @@ -855,7 +855,7 @@ define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: popq %rbx ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq @@ -1033,7 +1033,7 @@ define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { ; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: popq %rbx ; SSE-NEXT: popq %r14 ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll index c75fde45380..37c753474e6 100644 --- a/llvm/test/CodeGen/X86/combine-fcopysign.ll +++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll @@ -245,7 +245,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float ; SSE-NEXT: cvtss2sd %xmm5, %xmm4 ; SSE-NEXT: andps %xmm8, %xmm4 ; SSE-NEXT: orps %xmm0, %xmm4 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: andps %xmm7, %xmm0 @@ -257,7 +257,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float ; SSE-NEXT: cvtss2sd %xmm6, %xmm0 ; SSE-NEXT: andps %xmm8, %xmm0 ; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index 6bf22991afe..b99c05288b3 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -182,7 +182,7 @@ define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test14: ; CHECK: # BB#0: -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> @@ -207,7 +207,7 @@ define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test16: ; CHECK: # BB#0: -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> diff --git a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll index 5169e2039b0..30b4040582a 100644 --- a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll +++ b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll @@ -8,7 +8,7 @@ define void @test(<2 x double>* %dst, <4 x double> %src) nounwind { ; CHECK-LABEL: test: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: movaps %xmm0, (%eax) ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/CodeGen/X86/haddsub-2.ll index fd023d01803..1baa5f28d48 100644 --- a/llvm/test/CodeGen/X86/haddsub-2.ll +++ b/llvm/test/CodeGen/X86/haddsub-2.ll @@ -923,7 +923,7 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: subss %xmm4, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: not_a_hsub_2: diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll index 8a9bf6458c3..e59ff79e0d8 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/CodeGen/X86/haddsub-undef.ll @@ -171,7 +171,7 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index b7c43d3b2e3..18083dfe6b3 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -402,7 +402,7 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-LIBCALL-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; CHECK-LIBCALL-NEXT: unpcklps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] -; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-LIBCALL-NEXT: addq $48, %rsp ; CHECK-LIBCALL-NEXT: popq %rbx ; CHECK-LIBCALL-NEXT: retq @@ -457,7 +457,7 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; CHECK-I686-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-I686-NEXT: addl $56, %esp ; CHECK-I686-NEXT: popl %esi ; CHECK-I686-NEXT: retl @@ -487,14 +487,14 @@ define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { ; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload ; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 -; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2 ; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload ; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero ; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1 -; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-LIBCALL-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-LIBCALL-NEXT: addq $16, %rsp ; CHECK-LIBCALL-NEXT: popq %rbx ; CHECK-LIBCALL-NEXT: retq @@ -515,10 +515,10 @@ define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { ; BWON-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 ; BWON-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; BWON-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; BWON-F16C-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; BWON-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; BWON-F16C-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; BWON-F16C-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; BWON-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; BWON-F16C-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/horizontal-shuffle.ll b/llvm/test/CodeGen/X86/horizontal-shuffle.ll index def614150cd..d731ab4ba20 100644 --- a/llvm/test/CodeGen/X86/horizontal-shuffle.ll +++ b/llvm/test/CodeGen/X86/horizontal-shuffle.ll @@ -11,14 +11,14 @@ define <4 x float> @test_unpackl_fhadd_128(<4 x float> %a0, <4 x float> %a1, <4 ; X32: ## BB#0: ; X32-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; X32-NEXT: vhaddps %xmm3, %xmm2, %xmm1 -; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_unpackl_fhadd_128: ; X64: ## BB#0: ; X64-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ; X64-NEXT: vhaddps %xmm3, %xmm2, %xmm1 -; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) %2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a2, <4 x float> %a3) diff --git a/llvm/test/CodeGen/X86/i64-to-float.ll b/llvm/test/CodeGen/X86/i64-to-float.ll index 21a05449f06..26553f5f352 100644 --- a/llvm/test/CodeGen/X86/i64-to-float.ll +++ b/llvm/test/CodeGen/X86/i64-to-float.ll @@ -258,7 +258,7 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind { ; X64-SSE-NEXT: movq %xmm1, %rax ; X64-SSE-NEXT: xorps %xmm1, %xmm1 ; X64-SSE-NEXT: cvtsi2sdq %rax, %xmm1 -; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: clamp_sitofp_2i64_2f64: @@ -273,7 +273,7 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind { ; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 ; X64-AVX-NEXT: vmovq %xmm0, %rax ; X64-AVX-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; X64-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-AVX-NEXT: retq %clo = icmp slt <2 x i64> %a, <i64 -255, i64 -255> %lo = select <2 x i1> %clo, <2 x i64> <i64 -255, i64 -255>, <2 x i64> %a diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll index 5529f66546b..3e257f5fd85 100644 --- a/llvm/test/CodeGen/X86/masked_memop.ll +++ b/llvm/test/CodeGen/X86/masked_memop.ll @@ -1126,7 +1126,7 @@ define <8 x double> @load_one_mask_bit_set5(<8 x double>* %addr, <8 x double> %v ; AVX: ## BB#0: ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero -; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll index 65318b42f76..9a3d8b5aede 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll @@ -270,7 +270,7 @@ define <4 x float> @merge_4f32_f32_012u(float* %ptr) nounwind uwtable noinline s ; SSE2: # BB#0: ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_012u: @@ -319,7 +319,7 @@ define <4 x float> @merge_4f32_f32_019u(float* %ptr) nounwind uwtable noinline s ; SSE2: # BB#0: ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_019u: @@ -904,14 +904,14 @@ define <2 x i64> @merge_2i64_i64_12_volatile(i64* %ptr) nounwind uwtable noinlin ; SSE: # BB#0: ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: merge_2i64_i64_12_volatile: ; AVX: # BB#0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq ; ; X32-SSE1-LABEL: merge_2i64_i64_12_volatile: @@ -964,7 +964,7 @@ define <4 x float> @merge_4f32_f32_2345_volatile(float* %ptr) nounwind uwtable n ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: merge_4f32_f32_2345_volatile: diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index 03fb19258ad..e4ead498782 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -1444,49 +1444,49 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; GENERIC-LABEL: test_movlhps: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movlhps: ; ATOM: # BB#0: -; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_movlhps: ; SLM: # BB#0: -; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_movlhps: ; SANDY: # BB#0: -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movlhps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_movlhps: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movlhps: ; BTVER2: # BB#0: -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movlhps: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 836bc885b8c..c139797e902 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -2262,7 +2262,7 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi32: @@ -2294,7 +2294,7 @@ define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_epi64x: @@ -2313,12 +2313,12 @@ define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { ; X32: # BB#0: ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_set_pd: ; X64: # BB#0: -; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %res0 = insertelement <2 x double> undef, double %a1, i32 0 @@ -2671,7 +2671,7 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi32: @@ -2703,7 +2703,7 @@ define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_epi64x: @@ -2722,12 +2722,12 @@ define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { ; X32: # BB#0: ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_setr_pd: ; X64: # BB#0: -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 @@ -3839,12 +3839,12 @@ define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_unpacklo_epi64: ; X32: # BB#0: -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_unpacklo_epi64: ; X64: # BB#0: -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> ret <2 x i64> %res @@ -3853,12 +3853,12 @@ define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { ; X32-LABEL: test_mm_unpacklo_pd: ; X32: # BB#0: -; X32-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X32-NEXT: retl ; ; X64-LABEL: test_mm_unpacklo_pd: ; X64: # BB#0: -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> ret <2 x double> %res diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 736a622d9d4..9e46219f34a 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -2863,13 +2863,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; GENERIC-LABEL: test_movsd_reg: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; GENERIC-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] ; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movsd_reg: ; ATOM: # BB#0: -; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; ATOM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] ; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] ; ATOM-NEXT: nop # sched: [1:0.50] @@ -2879,33 +2879,33 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; ; SLM-LABEL: test_movsd_reg: ; SLM: # BB#0: -; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-LABEL: test_movsd_reg: ; SANDY: # BB#0: -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] +; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsd_reg: ; HASWELL: # BB#0: -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] +; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [2:1.00] ; ; SKYLAKE-LABEL: test_movsd_reg: ; SKYLAKE: # BB#0: -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] +; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [2:1.00] ; ; BTVER2-LABEL: test_movsd_reg: ; BTVER2: # BB#0: -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] +; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movsd_reg: ; ZNVER1: # BB#0: -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] +; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0> ret <2 x double> %1 diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll index 72af5db9838..b7e780b512c 100644 --- a/llvm/test/CodeGen/X86/sse2.ll +++ b/llvm/test/CodeGen/X86/sse2.ll @@ -40,7 +40,7 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { ; X64-LABEL: test2: ; X64: # BB#0: ; X64-NEXT: movaps (%rsi), %xmm1 -; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: movaps %xmm1, (%rdi) ; X64-NEXT: retq %tmp3 = load <2 x double>, <2 x double>* %A, align 16 @@ -206,7 +206,7 @@ define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) no ; X64: # BB#0: ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] @@ -225,7 +225,7 @@ define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind { ; X64: # BB#0: ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq %tmp = insertelement <4 x float> undef, float %a, i32 0 ; <<4 x float>> [#uses=1] %tmp11 = insertelement <4 x float> %tmp, float %b, i32 1 ; <<4 x float>> [#uses=1] @@ -242,7 +242,7 @@ define <2 x double> @test11(double %a, double %b) nounwind { ; ; X64-LABEL: test11: ; X64: # BB#0: -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: retq %tmp = insertelement <2 x double> undef, double %a, i32 0 ; <<2 x double>> [#uses=1] %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] @@ -315,7 +315,7 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { ; X86-NEXT: movaps %xmm2, %xmm0 ; X86-NEXT: addps %xmm1, %xmm0 ; X86-NEXT: subps %xmm1, %xmm2 -; X86-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X86-NEXT: retl ; ; X64-LABEL: test14: @@ -325,7 +325,7 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { ; X64-NEXT: movaps %xmm2, %xmm0 ; X64-NEXT: addps %xmm1, %xmm0 ; X64-NEXT: subps %xmm1, %xmm2 -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; X64-NEXT: retq %tmp = load <4 x float>, <4 x float>* %y ; <<4 x float>> [#uses=2] %tmp5 = load <4 x float>, <4 x float>* %x ; <<4 x float>> [#uses=2] diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll index 38d535589aa..f80ee38fa96 100644 --- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -342,7 +342,7 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: subss %xmm1, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; @@ -417,7 +417,7 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: addss %xmm0, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 3ed93a958e2..9f30767b10d 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -935,14 +935,14 @@ define <4 x float> @insertps_with_undefs(<4 x float> %a, float* %b) { ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X32-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X32-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X32-NEXT: movaps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: insertps_with_undefs: ; X64: ## BB#0: ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %1 = load float, float* %b, align 4 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index b307923766f..c6335d751ed 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -2375,7 +2375,7 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { ; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3] ; SSE-NEXT: retq @@ -2388,7 +2388,7 @@ define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { ; AVX-NEXT: fisttpll -{{[0-9]+}}(%rsp) ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; AVX-NEXT: retq %cvt = fptosi <2 x x86_fp80> %a to <2 x i32> diff --git a/llvm/test/CodeGen/X86/vec_insert-2.ll b/llvm/test/CodeGen/X86/vec_insert-2.ll index bb0951b8eaf..eedb1d252ba 100644 --- a/llvm/test/CodeGen/X86/vec_insert-2.ll +++ b/llvm/test/CodeGen/X86/vec_insert-2.ll @@ -46,7 +46,7 @@ define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind { ; ; X64-LABEL: t3: ; X64: # BB#0: -; X64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1 diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index add7a0d7dd0..9bf1f8a2aa6 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -25,7 +25,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -35,7 +35,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_2i64_to_2f64: @@ -44,7 +44,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_2i64_to_2f64: @@ -53,7 +53,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_2i64_to_2f64: @@ -223,14 +223,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: cvtsi2sdq %rax, %xmm3 ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE-NEXT: movq %xmm0, %rax ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2sdq %rax, %xmm0 -; SSE-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: movaps %xmm3, %xmm1 ; SSE-NEXT: retq @@ -242,12 +242,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -258,12 +258,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rax ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -274,12 +274,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -290,12 +290,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -478,7 +478,7 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_2i64_to_2f64: @@ -487,7 +487,7 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_2i64_to_2f64: @@ -796,12 +796,12 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -812,12 +812,12 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -1362,7 +1362,7 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -2189,7 +2189,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: addss %xmm0, %xmm0 ; SSE-NEXT: .LBB47_12: ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -2577,7 +2577,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; VEX-LABEL: sitofp_load_2i64_to_2f64: @@ -2587,7 +2587,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; VEX-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_load_2i64_to_2f64: @@ -2597,7 +2597,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64: @@ -2607,7 +2607,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: @@ -2719,7 +2719,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: movq %xmm2, %rax ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2sdq %rax, %xmm1 @@ -2727,7 +2727,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE-NEXT: movq %xmm2, %rax ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: cvtsi2sdq %rax, %xmm2 -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_load_4i64_to_4f64: @@ -2738,12 +2738,12 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -2755,12 +2755,12 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rax ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -2772,12 +2772,12 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -2789,12 +2789,12 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sdq %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -2917,7 +2917,7 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64: @@ -2927,7 +2927,7 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64: @@ -3138,12 +3138,12 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -3155,12 +3155,12 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2sdq %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -3323,7 +3323,7 @@ define <4 x float> @sitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_load_4i64_to_4f32: @@ -3491,7 +3491,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; SSE-NEXT: movq %xmm3, %rax ; SSE-NEXT: xorps %xmm4, %xmm4 ; SSE-NEXT: cvtsi2ssq %rax, %xmm4 @@ -3508,7 +3508,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: sitofp_load_8i64_to_8f32: @@ -3816,7 +3816,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; SSE-NEXT: addss %xmm2, %xmm2 ; SSE-NEXT: .LBB76_12: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: uitofp_load_4i64_to_4f32: @@ -4227,7 +4227,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: addss %xmm1, %xmm1 ; SSE-NEXT: .LBB80_21: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm7[0],xmm5[1],xmm7[1] ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; SSE-NEXT: movq %xmm2, %rax @@ -4247,7 +4247,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: addss %xmm2, %xmm2 ; SSE-NEXT: .LBB80_24: ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm5[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0] ; SSE-NEXT: retq ; ; AVX1-LABEL: uitofp_load_8i64_to_8f32: diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 2f5c55d74fb..44d4ef77486 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -1570,7 +1570,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2 x i16> %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: cvt_2i16_to_2f64: @@ -1587,7 +1587,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2 x i16> %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: retq ; ; AVX512F-LABEL: cvt_2i16_to_2f64: @@ -1604,7 +1604,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2 x i16> %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1621,7 +1621,7 @@ define <2 x double> @cvt_2i16_to_2f64(<2 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq %1 = bitcast <2 x i16> %a0 to <2 x half> %2 = fpext <2 x half> %1 to <2 x double> @@ -1652,10 +1652,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -1682,10 +1682,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -1712,10 +1712,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -1742,10 +1742,10 @@ define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = bitcast <4 x i16> %a0 to <4 x half> @@ -1766,7 +1766,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: cvt_8i16_to_2f64: @@ -1781,7 +1781,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: retq ; ; AVX512F-LABEL: cvt_8i16_to_2f64: @@ -1796,7 +1796,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1814,7 +1814,7 @@ define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> %2 = bitcast <2 x i16> %1 to <2 x half> @@ -1845,10 +1845,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -1874,10 +1874,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -1903,10 +1903,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -1934,10 +1934,10 @@ define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1987,17 +1987,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 ; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 ; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX1-NEXT: retq ; @@ -2041,17 +2041,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 ; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm5[0],xmm0[0] ; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 ; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX2-NEXT: retq ; @@ -2095,17 +2095,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 ; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -2150,17 +2150,17 @@ define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7 ; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm4 = xmm5[0],xmm4[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -2223,7 +2223,7 @@ define <2 x double> @load_cvt_2i16_to_2f64(<2 x i16>* %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: load_cvt_2i16_to_2f64: @@ -2236,7 +2236,7 @@ define <2 x double> @load_cvt_2i16_to_2f64(<2 x i16>* %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX2-NEXT: retq ; ; AVX512F-LABEL: load_cvt_2i16_to_2f64: @@ -2249,7 +2249,7 @@ define <2 x double> @load_cvt_2i16_to_2f64(<2 x i16>* %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -2263,7 +2263,7 @@ define <2 x double> @load_cvt_2i16_to_2f64(<2 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq %1 = load <2 x i16>, <2 x i16>* %a0 %2 = bitcast <2 x i16> %1 to <2 x half> @@ -2288,10 +2288,10 @@ define <4 x double> @load_cvt_4i16_to_4f64(<4 x i16>* %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -2311,10 +2311,10 @@ define <4 x double> @load_cvt_4i16_to_4f64(<4 x i16>* %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -2334,10 +2334,10 @@ define <4 x double> @load_cvt_4i16_to_4f64(<4 x i16>* %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -2357,10 +2357,10 @@ define <4 x double> @load_cvt_4i16_to_4f64(<4 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = load <4 x i16>, <4 x i16>* %a0 @@ -2392,10 +2392,10 @@ define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -2421,10 +2421,10 @@ define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -2450,10 +2450,10 @@ define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -2481,10 +2481,10 @@ define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a0 @@ -2523,17 +2523,17 @@ define <8 x double> @load_cvt_8i16_to_8f64(<8 x i16>* %a0) nounwind { ; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 ; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0] ; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 ; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 ; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX1-NEXT: retq ; @@ -2565,17 +2565,17 @@ define <8 x double> @load_cvt_8i16_to_8f64(<8 x i16>* %a0) nounwind { ; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 ; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm5[0] ; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 ; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 ; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; AVX2-NEXT: retq ; @@ -2607,17 +2607,17 @@ define <8 x double> @load_cvt_8i16_to_8f64(<8 x i16>* %a0) nounwind { ; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 ; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 ; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512F-NEXT: retq @@ -2650,17 +2650,17 @@ define <8 x double> @load_cvt_8i16_to_8f64(<8 x i16>* %a0) nounwind { ; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7 ; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 ; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm6 = xmm6[0],xmm7[0] ; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 ; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 ; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 ; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll index 7c7ab016664..991b3633cae 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll @@ -303,12 +303,12 @@ define <2 x double> @shuffle_v2f64_21(<2 x double> %a, <2 x double> %b) { define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> ret <2 x i64> %shuffle @@ -316,13 +316,13 @@ define <2 x i64> @shuffle_v2i64_02(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_02_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_02_copy: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_02_copy: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm2[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm2[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2> ret <2 x i64> %shuffle @@ -502,13 +502,13 @@ define <2 x i64> @shuffle_v2i64_13_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64 define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> ret <2 x i64> %shuffle @@ -516,13 +516,13 @@ define <2 x i64> @shuffle_v2i64_20(<2 x i64> %a, <2 x i64> %b) { define <2 x i64> @shuffle_v2i64_20_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64> %b) { ; SSE-LABEL: shuffle_v2i64_20_copy: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v2i64_20_copy: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm2[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 0> ret <2 x i64> %shuffle @@ -832,20 +832,20 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) { ; SSE-LABEL: shuffle_v2f64_z0: ; SSE: # BB#0: ; SSE-NEXT: xorps %xmm1, %xmm1 -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: shuffle_v2f64_z0: ; AVX1: # BB#0: ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v2f64_z0: ; AVX2: # BB#0: ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX2-NEXT: retq ; ; AVX512VL-LABEL: shuffle_v2f64_z0: @@ -1155,13 +1155,13 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) { ; SSE-LABEL: insert_mem_hi_v2i64: ; SSE: # BB#0: ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: insert_mem_hi_v2i64: ; AVX: # BB#0: ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %a = load i64, i64* %ptr %v = insertelement <2 x i64> undef, i64 %a, i32 0 @@ -1231,13 +1231,13 @@ define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { define <2 x double> @insert_reg_hi_v2f64(double %a, <2 x double> %b) { ; SSE-LABEL: insert_reg_hi_v2f64: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_reg_hi_v2f64: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %v = insertelement <2 x double> undef, double %a, i32 0 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll index 418f8881c8a..32efb3cd703 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll @@ -303,12 +303,12 @@ define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { define <4 x float> @shuffle_v4f32_0145(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: shuffle_v4f32_0145: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4f32_0145: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> ret <4 x float> %shuffle @@ -499,12 +499,12 @@ define <4 x i32> @shuffle_v4i32_4012(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_0145(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: shuffle_v4i32_0145: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_0145: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> ret <4 x i32> %shuffle @@ -554,13 +554,13 @@ define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_4501(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: shuffle_v4i32_4501: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: shuffle_v4i32_4501: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> ret <4 x i32> %shuffle @@ -1825,7 +1825,7 @@ define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) { ; AVX512VL-LABEL: shuffle_v4f32_bitcast_4401: ; AVX512VL: # BB#0: ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,1,1] -; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512VL-NEXT: retq %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1> %2 = bitcast <4 x i32> %1 to <2 x double> @@ -2207,13 +2207,13 @@ define <4 x i32> @insert_mem_hi_v4i32(<2 x i32>* %ptr, <4 x i32> %b) { ; SSE-LABEL: insert_mem_hi_v4i32: ; SSE: # BB#0: ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX1OR2-LABEL: insert_mem_hi_v4i32: ; AVX1OR2: # BB#0: ; AVX1OR2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1OR2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1OR2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1OR2-NEXT: retq ; ; AVX512VL-LABEL: insert_mem_hi_v4i32: @@ -2285,13 +2285,13 @@ define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) { define <4 x float> @insert_reg_hi_v4f32(double %a, <4 x float> %b) { ; SSE-LABEL: insert_reg_hi_v4f32: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; SSE-NEXT: movaps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: insert_reg_hi_v4f32: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %a.cast = bitcast double %a to <2 x float> %v = shufflevector <2 x float> %a.cast, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll index 1a7a5010257..7cabed05cec 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -430,7 +430,7 @@ define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { ; AVX1-LABEL: shuffle_v4f64_0415: ; AVX1: # BB#0: ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -614,7 +614,7 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0020: ; AVX1: # BB#0: ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] ; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -869,7 +869,7 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0451: ; AVX1: # BB#0: ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -903,7 +903,7 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_4015: ; AVX1: # BB#0: ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -1066,7 +1066,7 @@ define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { ; AVX1-LABEL: shuffle_v4i64_0415: ; AVX1: # BB#0: ; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 26e2226a614..c92001baeb4 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -1277,12 +1277,12 @@ define <4 x float> @combine_test2(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_test3(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test3: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test3: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> @@ -1381,12 +1381,12 @@ define <4 x i32> @combine_test7(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test8(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test8: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test8: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 1, i32 7> %2 = shufflevector <4 x i32> %1, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> @@ -1480,12 +1480,12 @@ define <4 x float> @combine_test12(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_test13(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_test13: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test13: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3> @@ -1578,12 +1578,12 @@ define <4 x i32> @combine_test17(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @combine_test18(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: combine_test18: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_test18: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %2 = shufflevector <4 x i32> %1, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 2, i32 3> @@ -1641,7 +1641,7 @@ define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) { ; SSE-LABEL: combine_test21: ; SSE: # BB#0: ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: movaps %xmm2, (%rdi) ; SSE-NEXT: retq @@ -1649,7 +1649,7 @@ define <4 x i32> @combine_test21(<8 x i32> %a, <4 x i32>* %ptr) { ; AVX-LABEL: combine_test21: ; AVX: # BB#0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm0[0],xmm1[0] ; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; AVX-NEXT: vmovaps %xmm2, (%rdi) ; AVX-NEXT: vzeroupper @@ -2168,12 +2168,12 @@ define <4 x float> @combine_undef_input_test1(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_undef_input_test2: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test2: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 1, i32 2, i32 4, i32 5> @@ -2183,12 +2183,12 @@ define <4 x float> @combine_undef_input_test2(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test3(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_undef_input_test3: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test3: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 1> @@ -2352,12 +2352,12 @@ define <4 x float> @combine_undef_input_test11(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_undef_input_test12: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test12: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 6, i32 0, i32 1, i32 7> %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 5, i32 6, i32 0, i32 1> @@ -2367,12 +2367,12 @@ define <4 x float> @combine_undef_input_test12(<4 x float> %a, <4 x float> %b) { define <4 x float> @combine_undef_input_test13(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: combine_undef_input_test13: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_undef_input_test13: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 5, i32 1, i32 7> %2 = shufflevector <4 x float> %b, <4 x float> %1, <4 x i32> <i32 4, i32 5, i32 0, i32 5> diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll index 3dc9e3f7217..611dab4c942 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-128.ll @@ -44,7 +44,7 @@ define <2 x i64> @var_shuffle_v2i64_v2i64_xx_i64(<2 x i64> %x, i32 %i0, i32 %i1) ; SSE-NEXT: andl $1, %esi ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: var_shuffle_v2i64_v2i64_xx_i64: @@ -56,7 +56,7 @@ define <2 x i64> @var_shuffle_v2i64_v2i64_xx_i64(<2 x i64> %x, i32 %i0, i32 %i1) ; AVX-NEXT: andl $1, %esi ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX-NEXT: retq %x0 = extractelement <2 x i64> %x, i32 %i0 %x1 = extractelement <2 x i64> %x, i32 %i1 @@ -83,7 +83,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32: @@ -103,7 +103,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_xxxx_i32(<4 x float> %x, i32 %i0, i3 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v4f32_v4f32_xxxx_i32: @@ -168,7 +168,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32: @@ -188,7 +188,7 @@ define <4 x i32> @var_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32 %i0, i32 %i ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v4i32_v4i32_xxxx_i32: @@ -739,7 +739,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSSE3-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32: @@ -759,7 +759,7 @@ define <4 x i32> @mem_shuffle_v4i32_v4i32_xxxx_i32(<4 x i32> %x, i32* %i) nounwi ; SSSE3-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSSE3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSSE3-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; SSSE3-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSSE3-NEXT: retq ; ; SSE41-LABEL: mem_shuffle_v4i32_v4i32_xxxx_i32: @@ -1180,7 +1180,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_x0yx_i32(<4 x float> %x, <4 x float> ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE-NEXT: retq ; ; AVX-LABEL: var_shuffle_v4f32_v4f32_x0yx_i32: @@ -1197,7 +1197,7 @@ define <4 x float> @var_shuffle_v4f32_v4f32_x0yx_i32(<4 x float> %x, <4 x float> ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; AVX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX-NEXT: retq %x0 = extractelement <4 x float> %x, i32 %i0 %x1 = extractelement <4 x float> %x, i32 %i1 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll index 5320050051a..672ccdce50e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll @@ -104,10 +104,10 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i ; ALL-NEXT: vmovaps %ymm0, (%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: movq %rbp, %rsp ; ALL-NEXT: popq %rbp @@ -135,7 +135,7 @@ define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i ; ALL-NEXT: vmovaps %ymm0, (%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; ALL-NEXT: vmovaps %xmm0, %xmm0 ; ALL-NEXT: movq %rbp, %rsp ; ALL-NEXT: popq %rbp @@ -161,10 +161,10 @@ define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: retq %x0 = extractelement <2 x i64> %x, i64 %i0 @@ -610,10 +610,10 @@ define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwi ; ALL-NEXT: vmovaps %ymm0, (%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: movq %rbp, %rsp ; ALL-NEXT: popq %rbp @@ -651,10 +651,10 @@ define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwi ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; ALL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: retq %p0 = getelementptr inbounds i64, i64* %i, i32 0 diff --git a/llvm/test/CodeGen/X86/vector-truncate-combine.ll b/llvm/test/CodeGen/X86/vector-truncate-combine.ll index 61808b80251..de68f6b7059 100644 --- a/llvm/test/CodeGen/X86/vector-truncate-combine.ll +++ b/llvm/test/CodeGen/X86/vector-truncate-combine.ll @@ -14,7 +14,7 @@ ; NOTE: This operation is collapsed to a single truncate, so this test no longer covers ; what it originally intended to. -; CHECK: MOVLHPSrr +; CHECK: PUNPCKLQDQrr ; CHECK: PSHUFHWri ; CHECK: PACKUSWBrr ; CHECK: PACKUSWBrr diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll index e0eed2a6ed8..e7bb0c02fc1 100644 --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -462,15 +462,15 @@ define <2 x i64> @test25(<2 x i64> %a, <2 x i64> %b) { define <4 x float> @select_of_shuffles_0(<2 x float> %a0, <2 x float> %b0, <2 x float> %a1, <2 x float> %b1) { ; SSE-LABEL: select_of_shuffles_0: ; SSE: # BB#0: -; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; SSE-NEXT: subps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_of_shuffles_0: ; AVX: # BB#0: -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = shufflevector <2 x float> %a0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> diff --git a/llvm/test/CodeGen/X86/widen_extract-1.ll b/llvm/test/CodeGen/X86/widen_extract-1.ll index 760c33f5c86..d75fedc3203 100644 --- a/llvm/test/CodeGen/X86/widen_extract-1.ll +++ b/llvm/test/CodeGen/X86/widen_extract-1.ll @@ -14,7 +14,7 @@ define void @convert(<2 x double>* %dst.addr, <3 x double> %src) { ; ; X64-LABEL: convert: ; X64: # BB#0: # %entry -; X64-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; X64-NEXT: movaps %xmm0, (%rdi) ; X64-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/xop-mask-comments.ll b/llvm/test/CodeGen/X86/xop-mask-comments.ll index dadb0103e13..665bcaae777 100644 --- a/llvm/test/CodeGen/X86/xop-mask-comments.ll +++ b/llvm/test/CodeGen/X86/xop-mask-comments.ll @@ -101,13 +101,13 @@ define <2 x double> @vpermil2pd_21(<2 x double> %a0, <2 x double> %a1) { ; X32-LABEL: vpermil2pd_21: ; X32: # BB#0: ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X32-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; X32-NEXT: retl ; ; X64-LABEL: vpermil2pd_21: ; X64: # BB#0: ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; X64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; X64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; X64-NEXT: retq %1 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 10, i64 1>, i8 2) ret <2 x double> %1 |