diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 28 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512dq-intrinsics.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 120 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll | 72 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll | 36 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 12 |
6 files changed, 112 insertions, 180 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 551e3935449..e94f16a5629 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -4322,21 +4322,17 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: ; X64: # %bb.0: -; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2dq %zmm0, %ymm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} ; X64-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512: ; X86: # %bb.0: -; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2dq %zmm0, %ymm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttpd2dq %zmm0, %ymm1 {%k1} ; X86-NEXT: vcvttpd2dq {sae}, %zmm0, %ymm0 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl @@ -4377,21 +4373,17 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) { ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: ; X64: # %bb.0: -; X64-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %zmm0, %ymm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} ; X64-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X64-NEXT: retq ; ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512: ; X86: # %bb.0: -; X86-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %zmm0, %ymm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttpd2udq %zmm0, %ymm1 {%k1} ; X86-NEXT: vcvttpd2udq {sae}, %zmm0, %ymm0 ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; X86-NEXT: retl @@ -4407,8 +4399,7 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2dq %zmm0, %zmm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} ; X64-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X64-NEXT: retq @@ -4416,8 +4407,7 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vcvttps2dq %zmm0, %zmm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttps2dq %zmm0, %zmm1 {%k1} ; X86-NEXT: vcvttps2dq {sae}, %zmm0, %zmm0 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X86-NEXT: retl @@ -4433,8 +4423,7 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %zmm0, %zmm2 -; X64-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X64-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} ; X64-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 ; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X64-NEXT: retq @@ -4442,8 +4431,7 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 -; X86-NEXT: vcvttps2udq %zmm0, %zmm2 -; X86-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1} +; X86-NEXT: vcvttps2udq %zmm0, %zmm1 {%k1} ; X86-NEXT: vcvttps2udq {sae}, %zmm0, %zmm0 ; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll index fc04ae835fb..506db572671 100644 --- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -286,8 +286,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] ; X86-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -295,8 +294,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttpd2qq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x7a,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttpd2qq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x7a,0xc8] ; X64-NEXT: vcvttpd2qq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x7a,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -312,8 +310,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] ; X86-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -321,8 +318,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttpd2uqq %zmm0, %zmm2 # encoding: [0x62,0xf1,0xfd,0x48,0x78,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttpd2uqq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x78,0xc8] ; X64-NEXT: vcvttpd2uqq {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x18,0x78,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -338,8 +334,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] ; X86-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -347,8 +342,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttps2qq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x7a,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttps2qq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x7a,0xc8] ; X64-NEXT: vcvttps2qq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x7a,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] @@ -364,8 +358,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: ; X86: # %bb.0: ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0] -; X86-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X86-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] ; X86-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] ; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] @@ -373,8 +366,7 @@ define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vcvttps2uqq %ymm0, %zmm2 # encoding: [0x62,0xf1,0x7d,0x48,0x78,0xd0] -; X64-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x49,0x6f,0xca] +; X64-NEXT: vcvttps2uqq %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x78,0xc8] ; X64-NEXT: vcvttps2uqq {sae}, %ymm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x78,0xc0] ; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 5afbf5b6720..431d6f9d28f 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -599,17 +599,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8] +; X86-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7a,0xc8] ; X64-NEXT: vcvttpd2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) @@ -623,17 +623,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8] +; X86-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2qq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7a,0xc8] ; X64-NEXT: vcvttpd2qq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) @@ -647,17 +647,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8] +; X86-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x78,0xc8] ; X64-NEXT: vcvttpd2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x08,0x78,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) @@ -671,17 +671,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8] +; X86-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2uqq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x78,0xc8] ; X64-NEXT: vcvttpd2uqq %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) @@ -695,17 +695,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8 define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2qq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0xc8] ; X64-NEXT: vcvttps2qq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) @@ -735,16 +735,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -756,16 +754,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -795,16 +791,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2(<2 x float>* %p ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -816,16 +810,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2(<2 x float>* % ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -853,16 +845,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3(<4 x float>* %p ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) @@ -873,16 +863,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3(<4 x float>* % ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2qq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2qq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2qq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2qq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x7a,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2qq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x7a,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) @@ -894,17 +882,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8 define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8] +; X86-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2qq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7a,0xc8] ; X64-NEXT: vcvttps2qq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x7a,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) @@ -995,17 +983,17 @@ declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] ; X86-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2uqq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0xc8] ; X64-NEXT: vcvttps2uqq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0xc0] -; X64-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc8] ; X64-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) @@ -1035,16 +1023,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load(<2 x float>* %p, ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1056,16 +1042,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load(<2 x float>* %p ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1095,16 +1079,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2(<2 x float>* % ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1116,16 +1098,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2(<2 x float>* ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_2: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <2 x float>, <2 x float>* %p %x0b = shufflevector <2 x float> %x0, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> @@ -1153,16 +1133,14 @@ define <2 x i64> @test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3(<4 x float>* % ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x08] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm1 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x0f] -; X64-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0xc1] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %passthru, i8 %mask) @@ -1173,16 +1151,14 @@ define <2 x i64> @test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3(<4 x float>* ; X86-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vcvttps2uqq (%eax), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x00] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X86-NEXT: vcvttps2uqq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_maskz_cvtt_ps2uqq_128_load_3: ; X64: # %bb.0: ; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 # encoding: [0x62,0xf1,0x7d,0x08,0x78,0x07] -; X64-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0xc0] +; X64-NEXT: vcvttps2uqq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x78,0x07] ; X64-NEXT: retq # encoding: [0xc3] %x0 = load <4 x float>, <4 x float>* %p %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> zeroinitializer, i8 %mask) @@ -1194,17 +1170,17 @@ declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] ; X86-NEXT: kmovb {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8] +; X86-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] ; X86-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2uqq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x78,0xc8] ; X64-NEXT: vcvttps2uqq %xmm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xc8] ; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll index 2337745bdd3..82a19ba41cd 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll @@ -682,17 +682,15 @@ define <2 x i64> @test_mm256_mask_cvttpd_epi32(<2 x i64> %__W, i8 zeroext %__U, ; X86-LABEL: test_mm256_mask_cvttpd_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttpd2dq %ymm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttpd_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttpd2dq %ymm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttpd2dq %ymm1, %xmm0 {%k1} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -709,17 +707,15 @@ define <2 x i64> @test_mm256_maskz_cvttpd_epi32(i8 zeroext %__U, <4 x double> %_ ; X86-LABEL: test_mm256_maskz_cvttpd_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttpd_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 {%k1} {z} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -798,16 +794,14 @@ define <2 x i64> @test_mm256_mask_cvttpd_epu32(<2 x i64> %__W, i8 zeroext %__U, ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %ymm1, %xmm1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttpd_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %ymm1, %xmm1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttpd2udq %ymm1, %xmm0 {%k1} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -822,16 +816,14 @@ define <2 x i64> @test_mm256_maskz_cvttpd_epu32(i8 zeroext %__U, <4 x double> %_ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z} ; X86-NEXT: vzeroupper ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttpd_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 {%k1} {z} ; X64-NEXT: vzeroupper ; X64-NEXT: retq entry: @@ -844,16 +836,14 @@ define <2 x i64> @test_mm_mask_cvttps_epi32(<2 x i64> %__W, i8 zeroext %__U, <4 ; X86-LABEL: test_mm_mask_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %xmm1, %xmm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %xmm1, %xmm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttps2dq %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8 @@ -869,16 +859,14 @@ define <2 x i64> @test_mm_maskz_cvttps_epi32(i8 zeroext %__U, <4 x float> %__A) ; X86-LABEL: test_mm_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %xmm0, %xmm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %xmm0, %xmm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttps2dq %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %__A) #8 @@ -893,16 +881,14 @@ define <4 x i64> @test_mm256_mask_cvttps_epi32(<4 x i64> %__W, i8 zeroext %__U, ; X86-LABEL: test_mm256_mask_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %ymm1, %ymm1 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X86-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %ymm1, %ymm1 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X64-NEXT: vcvttps2dq %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8 @@ -917,16 +903,14 @@ define <4 x i64> @test_mm256_maskz_cvttps_epi32(i8 zeroext %__U, <8 x float> %__ ; X86-LABEL: test_mm256_maskz_cvttps_epi32: ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: vcvttps2dq %ymm0, %ymm0 ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttps_epi32: ; X64: # %bb.0: # %entry -; X64-NEXT: vcvttps2dq %ymm0, %ymm0 ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vcvttps2dq %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %__A) #8 @@ -952,15 +936,13 @@ define <2 x i64> @test_mm_mask_cvttps_epu32(<2 x i64> %__W, i8 zeroext %__U, <4 ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %xmm1, %xmm1 -; X86-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X86-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_mask_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %xmm1, %xmm1 -; X64-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} +; X64-NEXT: vcvttps2udq %xmm1, %xmm0 {%k1} ; X64-NEXT: retq entry: %0 = bitcast <2 x i64> %__W to <4 x i32> @@ -974,15 +956,13 @@ define <2 x i64> @test_mm_maskz_cvttps_epu32(i8 zeroext %__U, <4 x float> %__A) ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %xmm0, %xmm0 -; X86-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X86-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm_maskz_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %xmm0, %xmm0 -; X64-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; X64-NEXT: vcvttps2udq %xmm0, %xmm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %__A, <4 x i32> zeroinitializer, i8 %__U) #8 @@ -1006,15 +986,13 @@ define <4 x i64> @test_mm256_mask_cvttps_epu32(<4 x i64> %__W, i8 zeroext %__U, ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %ymm1, %ymm1 -; X86-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X86-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_mask_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %ymm1, %ymm1 -; X64-NEXT: vmovdqa32 %ymm1, %ymm0 {%k1} +; X64-NEXT: vcvttps2udq %ymm1, %ymm0 {%k1} ; X64-NEXT: retq entry: %0 = bitcast <4 x i64> %__W to <8 x i32> @@ -1028,15 +1006,13 @@ define <4 x i64> @test_mm256_maskz_cvttps_epu32(i8 zeroext %__U, <8 x float> %__ ; X86: # %bb.0: # %entry ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ; X86-NEXT: kmovw %eax, %k1 -; X86-NEXT: vcvttps2udq %ymm0, %ymm0 -; X86-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X86-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z} ; X86-NEXT: retl ; ; X64-LABEL: test_mm256_maskz_cvttps_epu32: ; X64: # %bb.0: # %entry ; X64-NEXT: kmovw %edi, %k1 -; X64-NEXT: vcvttps2udq %ymm0, %ymm0 -; X64-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} +; X64-NEXT: vcvttps2udq %ymm0, %ymm0 {%k1} {z} ; X64-NEXT: retq entry: %0 = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %__A, <8 x i32> zeroinitializer, i8 %__U) #8 diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll index a5658036565..28c4111747c 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll @@ -10374,20 +10374,20 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] +; X86-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256: ; X64: # %bb.0: -; X64-NEXT: vcvttpd2dq %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xc0] +; X64-NEXT: vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) @@ -10401,19 +10401,19 @@ declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: ; X86: # %bb.0: -; X86-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] +; X86-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X86-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] +; X86-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128: ; X64: # %bb.0: -; X64-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] +; X64-NEXT: vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] -; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] +; X64-NEXT: vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8] +; X64-NEXT: vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1) @@ -10426,19 +10426,19 @@ declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: ; X86: # %bb.0: -; X86-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] +; X86-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] -; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X86-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] +; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256: ; X64: # %bb.0: -; X64-NEXT: vcvttps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xc0] +; X64-NEXT: vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0] ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] -; X64-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] -; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] +; X64-NEXT: vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8] +; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1) diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 9dfde40e8e5..b5dfa2f11ad 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3704,8 +3704,8 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8] ; X86-NEXT: vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X86-NEXT: retl # encoding: [0xc3] @@ -3713,8 +3713,8 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttpd2udq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfc,0x29,0x78,0xc8] ; X64-NEXT: vcvttpd2udq %ymm0, %xmm0 # encoding: [0x62,0xf1,0xfc,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; X64-NEXT: retq # encoding: [0xc3] @@ -3731,16 +3731,16 @@ define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8] ; X86-NEXT: vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0] -; X86-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X86-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2udq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x78,0xc8] ; X64-NEXT: vcvttps2udq %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7c,0x08,0x78,0xc0] -; X64-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0xc8] ; X64-NEXT: vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) @@ -3756,16 +3756,16 @@ define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8] +; X86-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8] ; X86-NEXT: vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0] -; X86-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] ; X86-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256: ; X64: # %bb.0: ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf] +; X64-NEXT: vcvttps2udq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x78,0xc8] ; X64-NEXT: vcvttps2udq %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7c,0x28,0x78,0xc0] -; X64-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xc8] ; X64-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) |