diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-02-19 22:07:31 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-02-19 22:07:31 +0000 |
| commit | b195ed8ce3ce2f31684b275640b9f7abaad8eeac (patch) | |
| tree | f8c4b087897cdb0cf9340b6866f10bf2d441bc2f | |
| parent | 1d14779aed200632328b38b04a89575b0a89c38b (diff) | |
| download | bcm5719-llvm-b195ed8ce3ce2f31684b275640b9f7abaad8eeac.tar.gz bcm5719-llvm-b195ed8ce3ce2f31684b275640b9f7abaad8eeac.zip | |
[X86] Use vpmovq2m/vpmovd2m for truncate to vXi1 when possible.
Previously we used vptestmd, but the scheduling data for SKX says vpmovq2m/vpmovd2m is lower latency. We already used vpmovb2m/vpmovw2m for byte/word truncates. So this is more consistent anyway.
llvm-svn: 325534
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 667 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-ext.ll | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-mask-op.ll | 50 | ||||
| -rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 96 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-vselect.ll | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/compress_expand.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/masked_gather_scatter.ll | 96 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr33349.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/required-vector-width.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-compare-results.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-v1.ll | 6 |
14 files changed, 648 insertions, 386 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b8b3fbec732..c6916fdf1a1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16762,6 +16762,10 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, In = DAG.getNode(ISD::SHL, DL, InVT, In, DAG.getConstant(ShiftInx, DL, InVT)); } + // If we have DQI, emit a pattern that will be iseled as vpmovq2m/vpmovd2m. + if (Subtarget.hasDQI()) + return DAG.getNode(X86ISD::CMPM, DL, VT, DAG.getConstant(0, DL, InVT), + In, DAG.getConstant(6, DL, MVT::i8)); return DAG.getNode(X86ISD::CMPM, DL, VT, In, getZeroVector(InVT, Subtarget, DAG, DL), DAG.getConstant(4, DL, MVT::i8)); diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 6d18012834f..1a277d811ce 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -715,23 +715,41 @@ define <4 x float> @f64to4f32(<4 x double> %b) { } define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { -; NOVL-LABEL: f64to4f32_mask: -; NOVL: # %bb.0: -; NOVL-NEXT: vpslld $31, %xmm1, %xmm1 -; NOVL-NEXT: vptestmd %zmm1, %zmm1, %k1 -; NOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 -; NOVL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; NOVL-NEXT: vzeroupper -; NOVL-NEXT: retq +; NOVLDQ-LABEL: f64to4f32_mask: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vpslld $31, %xmm1, %xmm1 +; NOVLDQ-NEXT: vptestmd %zmm1, %zmm1, %k1 +; NOVLDQ-NEXT: vcvtpd2ps %ymm0, %xmm0 +; NOVLDQ-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; -; VL-LABEL: f64to4f32_mask: -; VL: # %bb.0: -; VL-NEXT: vpslld $31, %xmm1, %xmm1 -; VL-NEXT: vptestmd %xmm1, %xmm1, %k1 -; VL-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} -; VL-NEXT: vzeroupper -; VL-NEXT: retq +; VLDQ-LABEL: f64to4f32_mask: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vpslld $31, %xmm1, %xmm1 +; VLDQ-NEXT: vpmovd2m %xmm1, %k1 +; VLDQ-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} +; VLDQ-NEXT: vzeroupper +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: f64to4f32_mask: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpslld $31, %xmm1, %xmm1 +; VLNODQ-NEXT: vptestmd %xmm1, %xmm1, %k1 +; VLNODQ-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} +; VLNODQ-NEXT: vzeroupper +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: f64to4f32_mask: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: vpslld $31, %xmm1, %xmm1 +; DQNOVL-NEXT: vpmovd2m %zmm1, %k1 +; DQNOVL-NEXT: vcvtpd2ps %ymm0, %xmm0 +; DQNOVL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; DQNOVL-NEXT: vzeroupper +; DQNOVL-NEXT: retq %a = fptrunc <4 x double> %b to <4 x float> %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer ret <4 x float> %c @@ -2041,288 +2059,521 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { } define <2 x i64> @test_2f64toub(<2 x double> %a, <2 x i64> %passthru) { -; NOVL-LABEL: test_2f64toub: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0 -; NOVL-NEXT: vpslld $31, %ymm0, %ymm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; NOVL-NEXT: vzeroupper -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_2f64toub: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; NOVLDQ-NEXT: vcvttpd2udq %zmm0, %ymm0 +; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_2f64toub: -; VL: # %bb.0: -; VL-NEXT: vcvttpd2udq %xmm0, %xmm0 -; VL-NEXT: vpslld $31, %xmm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_2f64toub: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_2f64toub: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttpd2udq %xmm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_2f64toub: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; DQNOVL-NEXT: vcvttpd2udq %zmm0, %ymm0 +; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; DQNOVL-NEXT: vzeroupper +; DQNOVL-NEXT: retq %mask = fptoui <2 x double> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select } define <4 x i64> @test_4f64toub(<4 x double> %a, <4 x i64> %passthru) { -; NOVL-LABEL: test_4f64toub: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; NOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 -; NOVL-NEXT: vpslld $31, %xmm0, %xmm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_4f64toub: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_4f64toub: -; VL: # %bb.0: -; VL-NEXT: vcvttpd2dq %ymm0, %xmm0 -; VL-NEXT: vpslld $31, %xmm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_4f64toub: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_4f64toub: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_4f64toub: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; DQNOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 +; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; DQNOVL-NEXT: retq %mask = fptoui <4 x double> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f64toub(<8 x double> %a, <8 x i64> %passthru) { -; NOVL-LABEL: test_8f64toub: -; NOVL: # %bb.0: -; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; NOVL-NEXT: vpslld $31, %ymm0, %ymm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_8f64toub: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_8f64toub: -; VL: # %bb.0: -; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; VL-NEXT: vpslld $31, %ymm0, %ymm0 -; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 -; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_8f64toub: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 +; VLDQ-NEXT: vpmovd2m %ymm0, %k1 +; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_8f64toub: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 +; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_8f64toub: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: retq %mask = fptoui <8 x double> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select } define <2 x i64> @test_2f32toub(<2 x float> %a, <2 x i64> %passthru) { -; NOVL-LABEL: test_2f32toub: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 -; NOVL-NEXT: vpslld $31, %xmm0, %xmm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; NOVL-NEXT: vzeroupper -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_2f32toub: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_2f32toub: -; VL: # %bb.0: -; VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; VL-NEXT: vpslld $31, %xmm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_2f32toub: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_2f32toub: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_2f32toub: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; DQNOVL-NEXT: vzeroupper +; DQNOVL-NEXT: retq %mask = fptoui <2 x float> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select } define <4 x i64> @test_4f32toub(<4 x float> %a, <4 x i64> %passthru) { -; NOVL-LABEL: test_4f32toub: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 -; NOVL-NEXT: vpslld $31, %xmm0, %xmm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_4f32toub: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_4f32toub: -; VL: # %bb.0: -; VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; VL-NEXT: vpslld $31, %xmm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_4f32toub: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_4f32toub: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_4f32toub: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; DQNOVL-NEXT: retq %mask = fptoui <4 x float> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f32toub(<8 x float> %a, <8 x i64> %passthru) { -; NOVL-LABEL: test_8f32toub: -; NOVL: # %bb.0: -; NOVL-NEXT: vcvttps2dq %ymm0, %ymm0 -; NOVL-NEXT: vpslld $31, %ymm0, %ymm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_8f32toub: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; NOVLDQ-NEXT: vpslld $31, %ymm0, %ymm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_8f32toub: -; VL: # %bb.0: -; VL-NEXT: vcvttps2dq %ymm0, %ymm0 -; VL-NEXT: vpslld $31, %ymm0, %ymm0 -; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 -; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_8f32toub: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; VLDQ-NEXT: vpslld $31, %ymm0, %ymm0 +; VLDQ-NEXT: vpmovd2m %ymm0, %k1 +; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_8f32toub: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; VLNODQ-NEXT: vpslld $31, %ymm0, %ymm0 +; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_8f32toub: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: vcvttps2dq %ymm0, %ymm0 +; DQNOVL-NEXT: vpslld $31, %ymm0, %ymm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: retq %mask = fptoui <8 x float> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select } define <16 x i32> @test_16f32toub(<16 x float> %a, <16 x i32> %passthru) { -; ALL-LABEL: test_16f32toub: -; ALL: # %bb.0: -; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 -; ALL-NEXT: vpslld $31, %zmm0, %zmm0 -; ALL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; ALL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; ALL-NEXT: retq +; NODQ-LABEL: test_16f32toub: +; NODQ: # %bb.0: +; NODQ-NEXT: vcvttps2dq %zmm0, %zmm0 +; NODQ-NEXT: vpslld $31, %zmm0, %zmm0 +; NODQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NODQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; NODQ-NEXT: retq +; +; VLDQ-LABEL: test_16f32toub: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %zmm0, %zmm0 +; VLDQ-NEXT: vpslld $31, %zmm0, %zmm0 +; VLDQ-NEXT: vpmovd2m %zmm0, %k1 +; VLDQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; DQNOVL-LABEL: test_16f32toub: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: vcvttps2dq %zmm0, %zmm0 +; DQNOVL-NEXT: vpslld $31, %zmm0, %zmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: retq %mask = fptoui <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer ret <16 x i32> %select } define <2 x i64> @test_2f64tosb(<2 x double> %a, <2 x i64> %passthru) { -; NOVL-LABEL: test_2f64tosb: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; NOVL-NEXT: vcvttpd2dq %xmm0, %xmm0 -; NOVL-NEXT: vpslld $31, %xmm0, %xmm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; NOVL-NEXT: vzeroupper -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_2f64tosb: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 +; NOVLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_2f64tosb: -; VL: # %bb.0: -; VL-NEXT: vcvttpd2dq %xmm0, %xmm0 -; VL-NEXT: vpslld $31, %xmm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_2f64tosb: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 +; VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_2f64tosb: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttpd2dq %xmm0, %xmm0 +; VLNODQ-NEXT: vpslld $31, %xmm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_2f64tosb: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; DQNOVL-NEXT: vcvttpd2dq %xmm0, %xmm0 +; DQNOVL-NEXT: vpslld $31, %xmm0, %xmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; DQNOVL-NEXT: vzeroupper +; DQNOVL-NEXT: retq %mask = fptosi <2 x double> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select } define <4 x i64> @test_4f64tosb(<4 x double> %a, <4 x i64> %passthru) { -; NOVL-LABEL: test_4f64tosb: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; NOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_4f64tosb: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_4f64tosb: -; VL: # %bb.0: -; VL-NEXT: vcvttpd2dq %ymm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_4f64tosb: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_4f64tosb: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttpd2dq %ymm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_4f64tosb: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; DQNOVL-NEXT: vcvttpd2dq %ymm0, %xmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; DQNOVL-NEXT: retq %mask = fptosi <4 x double> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f64tosb(<8 x double> %a, <8 x i64> %passthru) { -; NOVL-LABEL: test_8f64tosb: -; NOVL: # %bb.0: -; NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_8f64tosb: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_8f64tosb: -; VL: # %bb.0: -; VL-NEXT: vcvttpd2dq %zmm0, %ymm0 -; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 -; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_8f64tosb: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VLDQ-NEXT: vpmovd2m %ymm0, %k1 +; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_8f64tosb: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttpd2dq %zmm0, %ymm0 +; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_8f64tosb: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: vcvttpd2dq %zmm0, %ymm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: retq %mask = fptosi <8 x double> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select } define <2 x i64> @test_2f32tosb(<2 x float> %a, <2 x i64> %passthru) { -; NOVL-LABEL: test_2f32tosb: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 -; NOVL-NEXT: vzeroupper -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_2f32tosb: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; NOVLDQ-NEXT: vzeroupper +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_2f32tosb: -; VL: # %bb.0: -; VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_2f32tosb: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_2f32tosb: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %xmm1, %xmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_2f32tosb: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; DQNOVL-NEXT: vzeroupper +; DQNOVL-NEXT: retq %mask = fptosi <2 x float> %a to <2 x i1> %select = select <2 x i1> %mask, <2 x i64> %passthru, <2 x i64> zeroinitializer ret <2 x i64> %select } define <4 x i64> @test_4f32tosb(<4 x float> %a, <4 x i64> %passthru) { -; NOVL-LABEL: test_4f32tosb: -; NOVL: # %bb.0: -; NOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 -; NOVL-NEXT: vcvttps2dq %xmm0, %xmm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_4f32tosb: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; NOVLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_4f32tosb: -; VL: # %bb.0: -; VL-NEXT: vcvttps2dq %xmm0, %xmm0 -; VL-NEXT: vptestmd %xmm0, %xmm0, %k1 -; VL-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_4f32tosb: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLDQ-NEXT: vpmovd2m %xmm0, %k1 +; VLDQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_4f32tosb: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttps2dq %xmm0, %xmm0 +; VLNODQ-NEXT: vptestmd %xmm0, %xmm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_4f32tosb: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; DQNOVL-NEXT: vcvttps2dq %xmm0, %xmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; DQNOVL-NEXT: retq %mask = fptosi <4 x float> %a to <4 x i1> %select = select <4 x i1> %mask, <4 x i64> %passthru, <4 x i64> zeroinitializer ret <4 x i64> %select } define <8 x i64> @test_8f32tosb(<8 x float> %a, <8 x i64> %passthru) { -; NOVL-LABEL: test_8f32tosb: -; NOVL: # %bb.0: -; NOVL-NEXT: vcvttps2dq %ymm0, %ymm0 -; NOVL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; NOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; NOVL-NEXT: retq +; NOVLDQ-LABEL: test_8f32tosb: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; NOVLDQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NOVLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; NOVLDQ-NEXT: retq ; -; VL-LABEL: test_8f32tosb: -; VL: # %bb.0: -; VL-NEXT: vcvttps2dq %ymm0, %ymm0 -; VL-NEXT: vptestmd %ymm0, %ymm0, %k1 -; VL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} -; VL-NEXT: retq +; VLDQ-LABEL: test_8f32tosb: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; VLDQ-NEXT: vpmovd2m %ymm0, %k1 +; VLDQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; VLNODQ-LABEL: test_8f32tosb: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vcvttps2dq %ymm0, %ymm0 +; VLNODQ-NEXT: vptestmd %ymm0, %ymm0, %k1 +; VLNODQ-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; VLNODQ-NEXT: retq +; +; DQNOVL-LABEL: test_8f32tosb: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: vcvttps2dq %ymm0, %ymm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa64 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: retq %mask = fptosi <8 x float> %a to <8 x i1> %select = select <8 x i1> %mask, <8 x i64> %passthru, <8 x i64> zeroinitializer ret <8 x i64> %select } define <16 x i32> @test_16f32tosb(<16 x float> %a, <16 x i32> %passthru) { -; ALL-LABEL: test_16f32tosb: -; ALL: # %bb.0: -; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 -; ALL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; ALL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} -; ALL-NEXT: retq +; NODQ-LABEL: test_16f32tosb: +; NODQ: # %bb.0: +; NODQ-NEXT: vcvttps2dq %zmm0, %zmm0 +; NODQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; NODQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; NODQ-NEXT: retq +; +; VLDQ-LABEL: test_16f32tosb: +; VLDQ: # %bb.0: +; VLDQ-NEXT: vcvttps2dq %zmm0, %zmm0 +; VLDQ-NEXT: vpmovd2m %zmm0, %k1 +; VLDQ-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; VLDQ-NEXT: retq +; +; DQNOVL-LABEL: test_16f32tosb: +; DQNOVL: # %bb.0: +; DQNOVL-NEXT: vcvttps2dq %zmm0, %zmm0 +; DQNOVL-NEXT: vpmovd2m %zmm0, %k1 +; DQNOVL-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} {z} +; DQNOVL-NEXT: retq %mask = fptosi <16 x float> %a to <16 x i1> %select = select <16 x i1> %mask, <16 x i32> %passthru, <16 x i32> zeroinitializer ret <16 x i32> %select diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll index 128855313ab..a87b22e4e95 100644 --- a/llvm/test/CodeGen/X86/avx512-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-ext.ll @@ -308,7 +308,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; SKX-LABEL: zext_4x8mem_to_4x32: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -330,7 +330,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; SKX-LABEL: sext_4x8mem_to_4x32: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} ; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -498,7 +498,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re ; SKX-LABEL: zext_2x8mem_to_2x64: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovq2m %xmm0, %k1 ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; SKX-NEXT: retq %a = load <2 x i8>,<2 x i8> *%i,align 1 @@ -519,7 +519,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin ; SKX-LABEL: sext_2x8mem_to_2x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovq2m %xmm0, %k1 ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} ; SKX-NEXT: retq %a = load <2 x i8>,<2 x i8> *%i,align 1 @@ -550,7 +550,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; SKX-LABEL: zext_4x8mem_to_4x64: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -572,7 +572,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin ; SKX-LABEL: sext_4x8mem_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -656,7 +656,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind ; SKX-LABEL: zext_4x16mem_to_4x32: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -678,7 +678,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; SKX-LABEL: sext_4x16mem_to_4x32mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} ; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -878,7 +878,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind ; SKX-LABEL: zext_2x16mem_to_2x64: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovq2m %xmm0, %k1 ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; SKX-NEXT: retq %a = load <2 x i16>,<2 x i16> *%i,align 1 @@ -900,7 +900,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw ; SKX-LABEL: sext_2x16mem_to_2x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovq2m %xmm0, %k1 ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} ; SKX-NEXT: retq %a = load <2 x i16>,<2 x i16> *%i,align 1 @@ -932,7 +932,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind ; SKX-LABEL: zext_4x16mem_to_4x64: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -954,7 +954,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; SKX-LABEL: sext_4x16mem_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -1067,7 +1067,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind ; SKX-LABEL: zext_2x32mem_to_2x64: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovq2m %xmm0, %k1 ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero ; SKX-NEXT: retq %a = load <2 x i32>,<2 x i32> *%i,align 1 @@ -1089,7 +1089,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw ; SKX-LABEL: sext_2x32mem_to_2x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovq2m %xmm0, %k1 ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} ; SKX-NEXT: retq %a = load <2 x i32>,<2 x i32> *%i,align 1 @@ -1121,7 +1121,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind ; SKX-LABEL: zext_4x32mem_to_4x64: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; SKX-NEXT: retq %a = load <4 x i32>,<4 x i32> *%i,align 1 @@ -1143,7 +1143,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw ; SKX-LABEL: sext_4x32mem_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovd2m %xmm0, %k1 ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} ; SKX-NEXT: retq %a = load <4 x i32>,<4 x i32> *%i,align 1 @@ -1184,7 +1184,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind ; SKX-LABEL: zext_4x32_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; SKX-NEXT: retq %x = zext <4 x i32> %a to <4 x i64> @@ -1362,7 +1362,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; SKX-LABEL: trunc_16i32_to_16i1: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 -; SKX-NEXT: vptestmd %zmm0, %zmm0, %k0 +; SKX-NEXT: vpmovd2m %zmm0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 88e0b8cdd02..4a786325587 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -539,7 +539,7 @@ define void @test7(<8 x i1> %mask) { ; AVX512DQ: ## %bb.0: ## %allocas ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, %eax ; AVX512DQ-NEXT: orb $85, %al ; AVX512DQ-NEXT: vzeroupper @@ -688,7 +688,7 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) { ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: LBB18_3: ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper @@ -729,7 +729,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; SKX-NEXT: LBB20_1: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 ; SKX-NEXT: LBB20_3: -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 ; SKX-NEXT: vpmovm2d %k0, %xmm0 ; SKX-NEXT: retq ; @@ -759,7 +759,7 @@ define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; AVX512DQ-NEXT: LBB20_1: ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 ; AVX512DQ-NEXT: LBB20_3: -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512DQ-NEXT: vzeroupper @@ -1203,7 +1203,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) { ; SKX-LABEL: test22: ; SKX: ## %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; @@ -1219,7 +1219,7 @@ define void @test22(<4 x i1> %a, <4 x i1>* %addr) { ; AVX512DQ-LABEL: test22: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -1240,7 +1240,7 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) { ; SKX-LABEL: test23: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 +; SKX-NEXT: vpmovq2m %xmm0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq ; @@ -1256,7 +1256,7 @@ define void @test23(<2 x i1> %a, <2 x i1>* %addr) { ; AVX512DQ-LABEL: test23: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -1317,7 +1317,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; SKX-LABEL: store_v2i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 +; SKX-NEXT: vpmovq2m %xmm0, %k0 ; SKX-NEXT: knotw %k0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq @@ -1335,7 +1335,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; AVX512DQ-LABEL: store_v2i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1359,7 +1359,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; SKX-LABEL: store_v4i1: ; SKX: ## %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 ; SKX-NEXT: knotw %k0, %k0 ; SKX-NEXT: kmovb %k0, (%rdi) ; SKX-NEXT: retq @@ -1377,7 +1377,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; AVX512DQ-LABEL: store_v4i1: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1421,7 +1421,7 @@ define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: knotb %k0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -1463,7 +1463,7 @@ define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: knotw %k0, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -2068,7 +2068,7 @@ define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2107,7 +2107,7 @@ define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwq %xmm0, %zmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovq2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2145,7 +2145,7 @@ define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq @@ -2188,11 +2188,11 @@ define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 ; AVX512DQ-NEXT: vpslld $31, %zmm1, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm1, %k0 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -2235,10 +2235,10 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxwd %ymm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovsxwd %ymm1, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) ; AVX512DQ-NEXT: kmovw %k0, (%rdi) ; AVX512DQ-NEXT: vzeroupper @@ -2292,16 +2292,16 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 ; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k2 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k2 ; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm0 ; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3 ; AVX512DQ-NEXT: kmovw %k3, 6(%rdi) ; AVX512DQ-NEXT: kmovw %k2, 4(%rdi) ; AVX512DQ-NEXT: kmovw %k1, 2(%rdi) diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 8f826f07f24..b5067896d11 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -1789,7 +1789,7 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { ; GENERIC-LABEL: f64to4f32_mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [3:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -1797,7 +1797,7 @@ define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) { ; SKX-LABEL: f64to4f32_mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] ; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -3211,14 +3211,14 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC-LABEL: zext_4x8mem_to_4x32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x32: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -3231,14 +3231,14 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC-LABEL: sext_4x8mem_to_4x32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x32: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -3397,14 +3397,14 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re ; GENERIC-LABEL: zext_2x8mem_to_2x64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x8mem_to_2x64: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <2 x i8>,<2 x i8> *%i,align 1 @@ -3416,14 +3416,14 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin ; GENERIC-LABEL: sext_2x8mem_to_2x64mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x8mem_to_2x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <2 x i8>,<2 x i8> *%i,align 1 @@ -3450,14 +3450,14 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC-LABEL: zext_4x8mem_to_4x64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x64: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -3470,14 +3470,14 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin ; GENERIC-LABEL: sext_4x8mem_to_4x64mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i8>,<4 x i8> *%i,align 1 @@ -3560,14 +3560,14 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x16mem_to_4x32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x32: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -3580,14 +3580,14 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC-LABEL: sext_4x16mem_to_4x32mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x32mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -3791,14 +3791,14 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind ; GENERIC-LABEL: zext_2x16mem_to_2x64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x16mem_to_2x64: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <2 x i16>,<2 x i16> *%i,align 1 @@ -3811,14 +3811,14 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw ; GENERIC-LABEL: sext_2x16mem_to_2x64mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x16mem_to_2x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <2 x i16>,<2 x i16> *%i,align 1 @@ -3846,14 +3846,14 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x16mem_to_4x64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x64: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -3866,14 +3866,14 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC-LABEL: sext_4x16mem_to_4x64mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i16>,<4 x i16> *%i,align 1 @@ -3989,14 +3989,14 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind ; GENERIC-LABEL: zext_2x32mem_to_2x64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x32mem_to_2x64: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <2 x i32>,<2 x i32> *%i,align 1 @@ -4009,14 +4009,14 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw ; GENERIC-LABEL: sext_2x32mem_to_2x64mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x32mem_to_2x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <2 x i32>,<2 x i32> *%i,align 1 @@ -4044,14 +4044,14 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x32mem_to_4x64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x32mem_to_4x64: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i32>,<4 x i32> *%i,align 1 @@ -4064,14 +4064,14 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw ; GENERIC-LABEL: sext_4x32mem_to_4x64mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x32mem_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = load <4 x i32>,<4 x i32> *%i,align 1 @@ -4113,14 +4113,14 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind ; GENERIC-LABEL: zext_4x32_to_4x64mask: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x32_to_4x64mask: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00] ; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %x = zext <4 x i32> %a to <4 x i64> @@ -4306,7 +4306,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; GENERIC-LABEL: trunc_16i32_to_16i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] @@ -4315,7 +4315,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; SKX-LABEL: trunc_16i32_to_16i1: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %zmm0, %zmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: vzeroupper # sched: [4:1.00] @@ -7138,7 +7138,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; GENERIC-NEXT: .LBB389_1: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: .LBB389_3: -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7152,7 +7152,7 @@ define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) { ; SKX-NEXT: .LBB389_1: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: .LBB389_3: -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp sgt i32 %a1, %b1 @@ -7361,14 +7361,14 @@ define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) { ; GENERIC-LABEL: vmov_test22: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vmov_test22: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] store <4 x i1> %a, <4 x i1>* %addr @@ -7379,14 +7379,14 @@ define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) { ; GENERIC-LABEL: vmov_test23: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vmov_test23: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] store <2 x i1> %a, <2 x i1>* %addr @@ -7418,7 +7418,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; GENERIC-LABEL: store_v2i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7426,7 +7426,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; SKX-LABEL: store_v2i1: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -7439,7 +7439,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; GENERIC-LABEL: store_v4i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [1:1.00] +; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33] ; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -7447,7 +7447,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; SKX-LABEL: store_v4i1: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll index f6cb093d521..da9c8cac68a 100644 --- a/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll +++ b/llvm/test/CodeGen/X86/avx512-skx-insert-subvec.ll @@ -5,7 +5,7 @@ define <8 x i1> @test(<2 x i1> %a) { ; CHECK-LABEL: test: ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 +; CHECK-NEXT: vpmovq2m %xmm0, %k0 ; CHECK-NEXT: kshiftlb $2, %k0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: retq @@ -17,7 +17,7 @@ define <8 x i1> @test1(<2 x i1> %a) { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 +; CHECK-NEXT: vpmovq2m %xmm0, %k0 ; CHECK-NEXT: kshiftlb $4, %k0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: retq @@ -29,7 +29,7 @@ define <8 x i1> @test2(<2 x i1> %a) { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k0 +; CHECK-NEXT: vpmovq2m %xmm0, %k0 ; CHECK-NEXT: vpmovm2d %k0, %ymm0 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1] ; CHECK-NEXT: vpmovd2m %ymm0, %k0 @@ -44,7 +44,7 @@ define <8 x i1> @test3(<4 x i1> %a) { ; CHECK-LABEL: test3: ; CHECK: # %bb.0: ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 +; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: retq @@ -56,9 +56,9 @@ define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) { ; CHECK-LABEL: test4: ; CHECK: # %bb.0: ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 -; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0 +; CHECK-NEXT: vpmovd2m %xmm1, %k0 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 +; CHECK-NEXT: vpmovd2m %xmm0, %k1 ; CHECK-NEXT: kshiftlb $4, %k0, %k0 ; CHECK-NEXT: korb %k0, %k1, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 @@ -72,9 +72,9 @@ define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1 -; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0 +; CHECK-NEXT: vpmovq2m %xmm1, %k0 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 +; CHECK-NEXT: vpmovq2m %xmm0, %k1 ; CHECK-NEXT: kshiftlb $2, %k0, %k0 ; CHECK-NEXT: korb %k0, %k1, %k0 ; CHECK-NEXT: vpmovm2d %k0, %xmm0 @@ -88,9 +88,9 @@ define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: ; CHECK-NEXT: vpsllq $63, %xmm1, %xmm1 -; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k0 +; CHECK-NEXT: vpmovq2m %xmm1, %k0 ; CHECK-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-NEXT: vptestmq %xmm0, %xmm0, %k1 +; CHECK-NEXT: vpmovq2m %xmm0, %k1 ; CHECK-NEXT: kshiftlb $2, %k0, %k0 ; CHECK-NEXT: korb %k0, %k1, %k0 ; CHECK-NEXT: vpmovm2b %k0, %xmm0 @@ -104,9 +104,9 @@ define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) { ; CHECK-LABEL: test7: ; CHECK: # %bb.0: ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 -; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k0 +; CHECK-NEXT: vpmovd2m %xmm1, %k0 ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k1 +; CHECK-NEXT: vpmovd2m %xmm0, %k1 ; CHECK-NEXT: kshiftlb $4, %k0, %k0 ; CHECK-NEXT: korb %k0, %k1, %k0 ; CHECK-NEXT: vpmovm2b %k0, %ymm0 @@ -147,7 +147,7 @@ define <2 x i1> @test10(<4 x i1> %a, <4 x i1> %b) { ; CHECK-LABEL: test10: ; CHECK: # %bb.0: ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 +; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: kshiftrb $2, %k0, %k0 ; CHECK-NEXT: vpmovm2q %k0, %xmm0 ; CHECK-NEXT: retq @@ -159,7 +159,7 @@ define <8 x i1> @test11(<4 x i1> %a, <4 x i1>%b) { ; CHECK-LABEL: test11: ; CHECK: # %bb.0: ; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 -; CHECK-NEXT: vptestmd %xmm0, %xmm0, %k0 +; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: kshiftlb $4, %k0, %k0 ; CHECK-NEXT: vpmovm2w %k0, %xmm0 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/avx512-vselect.ll b/llvm/test/CodeGen/X86/avx512-vselect.ll index 0edd01e8aef..d61e4e13df9 100644 --- a/llvm/test/CodeGen/X86/avx512-vselect.ll +++ b/llvm/test/CodeGen/X86/avx512-vselect.ll @@ -5,12 +5,19 @@ target triple = "x86_64-unknown-unknown" define <8 x i64> @test1(<8 x i64> %m, <8 x i64> %a, <8 x i64> %b) { -; CHECK-LABEL: test1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vpsllq $63, %zmm0, %zmm0 -; CHECK-NEXT: vptestmq %zmm0, %zmm0, %k1 -; CHECK-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} -; CHECK-NEXT: retq +; CHECK-SKX-LABEL: test1: +; CHECK-SKX: # %bb.0: # %entry +; CHECK-SKX-NEXT: vpsllq $63, %zmm0, %zmm0 +; CHECK-SKX-NEXT: vpmovq2m %zmm0, %k1 +; CHECK-SKX-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-SKX-NEXT: retq +; +; CHECK-KNL-LABEL: test1: +; CHECK-KNL: # %bb.0: # %entry +; CHECK-KNL-NEXT: vpsllq $63, %zmm0, %zmm0 +; CHECK-KNL-NEXT: vptestmq %zmm0, %zmm0, %k1 +; CHECK-KNL-NEXT: vpblendmq %zmm1, %zmm2, %zmm0 {%k1} +; CHECK-KNL-NEXT: retq entry: %m.trunc = trunc <8 x i64> %m to <8 x i1> %ret = select <8 x i1> %m.trunc, <8 x i64> %a, <8 x i64> %b diff --git a/llvm/test/CodeGen/X86/compress_expand.ll b/llvm/test/CodeGen/X86/compress_expand.ll index 782162b4b9e..209991506da 100644 --- a/llvm/test/CodeGen/X86/compress_expand.ll +++ b/llvm/test/CodeGen/X86/compress_expand.ll @@ -191,7 +191,7 @@ define void @test10(i64* %base, <4 x i64> %V, <4 x i1> %mask) { ; SKX-LABEL: test10: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 ; SKX-NEXT: vpcompressq %ymm0, (%rdi) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -213,7 +213,7 @@ define void @test11(i64* %base, <2 x i64> %V, <2 x i1> %mask) { ; SKX-LABEL: test11: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovq2m %xmm1, %k1 ; SKX-NEXT: vpcompressq %xmm0, (%rdi) {%k1} ; SKX-NEXT: retq ; @@ -234,7 +234,7 @@ define void @test12(float* %base, <4 x float> %V, <4 x i1> %mask) { ; SKX-LABEL: test12: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 ; SKX-NEXT: vcompressps %xmm0, (%rdi) {%k1} ; SKX-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 9cda83e9e91..bc10c4aec50 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -841,7 +841,7 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { ; SKX-LABEL: test15: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 ; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1} ; SKX-NEXT: vmovaps %xmm1, %xmm0 ; SKX-NEXT: retq @@ -849,7 +849,7 @@ define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) { ; SKX_32-LABEL: test15: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovd2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm1 {%k1} ; SKX_32-NEXT: vmovaps %xmm1, %xmm0 @@ -891,7 +891,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x ; SKX-LABEL: test16: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 ; SKX-NEXT: vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1} ; SKX-NEXT: vmovapd %ymm2, %ymm0 ; SKX-NEXT: retq @@ -899,7 +899,7 @@ define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x ; SKX_32-LABEL: test16: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovd2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1} ; SKX_32-NEXT: vmovapd %ymm2, %ymm0 @@ -946,7 +946,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; SKX-NEXT: vpsllq $32, %xmm0, %xmm0 ; SKX-NEXT: vpsraq $32, %xmm0, %xmm0 ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovq2m %xmm1, %k1 ; SKX-NEXT: vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1} ; SKX-NEXT: vmovapd %xmm2, %xmm0 ; SKX-NEXT: retq @@ -956,7 +956,7 @@ define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x ; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0 ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovq2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1} ; SKX_32-NEXT: vmovapd %xmm2, %xmm0 @@ -1002,7 +1002,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SKX-LABEL: test18: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vpmovd2m %xmm2, %k1 ; SKX-NEXT: vpscatterqd %xmm0, (,%ymm1) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -1010,7 +1010,7 @@ define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) { ; SKX_32-LABEL: test18: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 +; SKX_32-NEXT: vpmovd2m %xmm2, %k1 ; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1} ; SKX_32-NEXT: retl call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) @@ -1046,7 +1046,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind ; SKX-LABEL: test19: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 ; SKX-NEXT: vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1} ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -1054,7 +1054,7 @@ define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind ; SKX_32-LABEL: test19: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovd2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1} ; SKX_32-NEXT: vzeroupper @@ -1093,7 +1093,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) { ; SKX-LABEL: test20: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vpmovq2m %xmm2, %k1 ; SKX-NEXT: vscatterqps %xmm0, (,%xmm1) {%k1} ; SKX-NEXT: retq ; @@ -1101,7 +1101,7 @@ define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) { ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1 +; SKX_32-NEXT: vpmovq2m %xmm2, %k1 ; SKX_32-NEXT: vscatterdps %xmm0, (,%xmm1) {%k1} ; SKX_32-NEXT: retl call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask) @@ -1137,7 +1137,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; SKX-LABEL: test21: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vpmovq2m %xmm2, %k1 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpscatterqd %xmm0, (,%xmm1) {%k1} ; SKX-NEXT: retq @@ -1145,7 +1145,7 @@ define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) { ; SKX_32-LABEL: test21: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1 +; SKX_32-NEXT: vpmovq2m %xmm2, %k1 ; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; SKX_32-NEXT: vpscatterdd %xmm0, (,%xmm1) {%k1} @@ -1189,7 +1189,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; SKX: # %bb.0: ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovq2m %xmm1, %k1 ; SKX-NEXT: vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1} ; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq @@ -1198,7 +1198,7 @@ define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x fl ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovq2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherdps (%eax,%xmm0,4), %xmm2 {%k1} ; SKX_32-NEXT: vmovaps %xmm2, %xmm0 @@ -1240,7 +1240,7 @@ define <2 x float> @test22a(float* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x f ; SKX-LABEL: test22a: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovq2m %xmm1, %k1 ; SKX-NEXT: vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1} ; SKX-NEXT: vmovaps %xmm2, %xmm0 ; SKX-NEXT: retq @@ -1248,7 +1248,7 @@ define <2 x float> @test22a(float* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x f ; SKX_32-LABEL: test22a: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovq2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vgatherqps (%eax,%xmm0,4), %xmm2 {%k1} ; SKX_32-NEXT: vmovaps %xmm2, %xmm0 @@ -1292,7 +1292,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; SKX-LABEL: test23: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovq2m %xmm1, %k1 ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; SKX-NEXT: vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1} @@ -1302,7 +1302,7 @@ define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> % ; SKX_32-LABEL: test23: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovq2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] ; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] @@ -1346,7 +1346,7 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> ; SKX-LABEL: test23b: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovq2m %xmm1, %k1 ; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; SKX-NEXT: vpgatherqd (%rdi,%xmm0,4), %xmm1 {%k1} ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero @@ -1355,7 +1355,7 @@ define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> ; SKX_32-LABEL: test23b: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovq2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] ; SKX_32-NEXT: vpgatherqd (%eax,%xmm0,4), %xmm1 {%k1} @@ -1447,7 +1447,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; SKX-NEXT: vpsllq $32, %xmm0, %xmm0 ; SKX-NEXT: vpsraq $32, %xmm0, %xmm0 ; SKX-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovq2m %xmm1, %k1 ; SKX-NEXT: vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1} ; SKX-NEXT: vmovdqa %xmm2, %xmm0 ; SKX-NEXT: retq @@ -1457,7 +1457,7 @@ define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> % ; SKX_32-NEXT: vpsllq $32, %xmm0, %xmm0 ; SKX_32-NEXT: vpsraq $32, %xmm0, %xmm0 ; SKX_32-NEXT: vpsllq $63, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmq %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovq2m %xmm1, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1} ; SKX_32-NEXT: vmovdqa %xmm2, %xmm0 @@ -1739,7 +1739,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX-LABEL: test30: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX-NEXT: vptestmd %xmm2, %xmm2, %k1 +; SKX-NEXT: vpmovd2m %xmm2, %k1 ; SKX-NEXT: kmovw %k1, %eax ; SKX-NEXT: vpmovsxdq %xmm1, %ymm1 ; SKX-NEXT: vpsllq $2, %ymm1, %ymm1 @@ -1778,7 +1778,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x ; SKX_32-NEXT: subl $12, %esp ; SKX_32-NEXT: .cfi_def_cfa_offset 16 ; SKX_32-NEXT: vpslld $31, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmd %xmm2, %xmm2, %k1 +; SKX_32-NEXT: vpmovd2m %xmm2, %k1 ; SKX_32-NEXT: kmovw %k1, %eax ; SKX_32-NEXT: vpslld $2, %xmm1, %xmm1 ; SKX_32-NEXT: vpaddd %xmm1, %xmm0, %xmm2 @@ -1882,7 +1882,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm2 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2} @@ -1894,7 +1894,7 @@ define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vpgatherdd (,%zmm0), %zmm2 {%k1} ; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0 ; SKX_32-NEXT: retl @@ -1940,7 +1940,7 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vpgatherqq (,%zmm0), %zmm3 {%k1} ; SKX-NEXT: vpgatherqq (,%zmm1), %zmm4 {%k2} @@ -1959,7 +1959,7 @@ define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i ; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1} @@ -1999,7 +1999,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm2 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2} @@ -2011,7 +2011,7 @@ define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vgatherdps (,%zmm0), %zmm2 {%k1} ; SKX_32-NEXT: vmovaps %zmm2, %zmm0 ; SKX_32-NEXT: retl @@ -2057,7 +2057,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vgatherqpd (,%zmm0), %zmm3 {%k1} ; SKX-NEXT: vgatherqpd (,%zmm1), %zmm4 {%k2} @@ -2076,7 +2076,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} @@ -2116,7 +2116,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> % ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1} ; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm0 @@ -2128,7 +2128,7 @@ define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> % ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vpscatterdd %zmm2, (,%zmm0) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl @@ -2173,7 +2173,7 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> % ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vpscatterqq %zmm3, (,%zmm0) {%k1} ; SKX-NEXT: vpscatterqq %zmm4, (,%zmm1) {%k2} @@ -2191,7 +2191,7 @@ define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> % ; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1} @@ -2231,7 +2231,7 @@ define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x floa ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1} ; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm0 @@ -2243,7 +2243,7 @@ define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x floa ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vscatterdps %zmm2, (,%zmm0) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl @@ -2289,7 +2289,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm2, %zmm2 ; SKX-NEXT: vpslld $31, %zmm2, %zmm2 -; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 +; SKX-NEXT: vpmovd2m %zmm2, %k1 ; SKX-NEXT: kshiftrw $8, %k1, %k2 ; SKX-NEXT: vscatterqpd %zmm3, (,%zmm0) {%k1} ; SKX-NEXT: vscatterqpd %zmm4, (,%zmm1) {%k2} @@ -2307,7 +2307,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; SKX_32-NEXT: subl $64, %esp ; SKX_32-NEXT: vpmovsxbd %xmm1, %zmm1 ; SKX_32-NEXT: vpslld $31, %zmm1, %zmm1 -; SKX_32-NEXT: vptestmd %zmm1, %zmm1, %k1 +; SKX_32-NEXT: vpmovd2m %zmm1, %k1 ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} @@ -2359,7 +2359,7 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6 ; SKX-LABEL: test_pr28312: ; SKX: # %bb.0: ; SKX-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX-NEXT: vpmovd2m %xmm1, %k1 ; SKX-NEXT: vpgatherqq (,%ymm0), %ymm1 {%k1} ; SKX-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2375,7 +2375,7 @@ define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i6 ; SKX_32-NEXT: andl $-32, %esp ; SKX_32-NEXT: subl $32, %esp ; SKX_32-NEXT: vpslld $31, %xmm1, %xmm1 -; SKX_32-NEXT: vptestmd %xmm1, %xmm1, %k1 +; SKX_32-NEXT: vpmovd2m %xmm1, %k1 ; SKX_32-NEXT: vpgatherdq (,%xmm0), %ymm1 {%k1} ; SKX_32-NEXT: vpaddq %ymm1, %ymm1, %ymm0 ; SKX_32-NEXT: vpaddq %ymm0, %ymm1, %ymm0 @@ -2546,7 +2546,7 @@ define <2 x float> @large_index(float* %base, <2 x i128> %ind, <2 x i1> %mask, < ; SKX-LABEL: large_index: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX-NEXT: vpmovq2m %xmm0, %k1 ; SKX-NEXT: vmovq %rcx, %xmm0 ; SKX-NEXT: vmovq %rsi, %xmm2 ; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] @@ -2557,7 +2557,7 @@ define <2 x float> @large_index(float* %base, <2 x i128> %ind, <2 x i1> %mask, < ; SKX_32-LABEL: large_index: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpsllq $63, %xmm0, %xmm0 -; SKX_32-NEXT: vptestmq %xmm0, %xmm0, %k1 +; SKX_32-NEXT: vpmovq2m %xmm0, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax ; SKX_32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SKX_32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -2694,7 +2694,7 @@ define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32> ; SKX-LABEL: test_scatter_2i32_index: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX-NEXT: vptestmq %xmm2, %xmm2, %k1 +; SKX-NEXT: vpmovq2m %xmm2, %k1 ; SKX-NEXT: vpsllq $32, %xmm1, %xmm1 ; SKX-NEXT: vpsraq $32, %xmm1, %xmm1 ; SKX-NEXT: vscatterqpd %xmm0, (%rdi,%xmm1,8) {%k1} @@ -2703,7 +2703,7 @@ define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32> ; SKX_32-LABEL: test_scatter_2i32_index: ; SKX_32: # %bb.0: ; SKX_32-NEXT: vpsllq $63, %xmm2, %xmm2 -; SKX_32-NEXT: vptestmq %xmm2, %xmm2, %k1 +; SKX_32-NEXT: vpmovq2m %xmm2, %k1 ; SKX_32-NEXT: vpsllq $32, %xmm1, %xmm1 ; SKX_32-NEXT: vpsraq $32, %xmm1, %xmm1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax diff --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll index f0036c82e56..6ca02a10729 100644 --- a/llvm/test/CodeGen/X86/pr33349.ll +++ b/llvm/test/CodeGen/X86/pr33349.ll @@ -45,7 +45,7 @@ target triple = "x86_64-unknown-linux-gnu" ; SKX-LABEL: test: ; SKX: # %bb.0: # %bb ; SKX-NEXT: vpslld $31, %xmm0, %xmm0 -; SKX-NEXT: vptestmd %xmm0, %xmm0, %k0 +; SKX-NEXT: vpmovd2m %xmm0, %k0 ; SKX-NEXT: kshiftrb $2, %k0, %k1 ; SKX-NEXT: kshiftrw $1, %k1, %k2 ; SKX-NEXT: kmovd %k2, %eax diff --git a/llvm/test/CodeGen/X86/required-vector-width.ll b/llvm/test/CodeGen/X86/required-vector-width.ll index e6b2f2f9585..257d3f0d079 100644 --- a/llvm/test/CodeGen/X86/required-vector-width.ll +++ b/llvm/test/CodeGen/X86/required-vector-width.ll @@ -614,7 +614,7 @@ define <16 x i16> @test_16f32toub_512(<16 x float>* %ptr, <16 x i16> %passthru) ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %zmm1 ; CHECK-NEXT: vpslld $31, %zmm1, %zmm1 -; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 +; CHECK-NEXT: vpmovd2m %zmm1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %a = load <16 x float>, <16 x float>* %ptr @@ -645,7 +645,7 @@ define <16 x i16> @test_16f32tosb_512(<16 x float>* %ptr, <16 x i16> %passthru) ; CHECK-LABEL: test_16f32tosb_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vcvttps2dq (%rdi), %zmm1 -; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 +; CHECK-NEXT: vpmovd2m %zmm1, %k1 ; CHECK-NEXT: vmovdqu16 %ymm0, %ymm0 {%k1} {z} ; CHECK-NEXT: retq %a = load <16 x float>, <16 x float>* %ptr diff --git a/llvm/test/CodeGen/X86/vector-compare-results.ll b/llvm/test/CodeGen/X86/vector-compare-results.ll index f33daf2900e..52130a7dfa0 100644 --- a/llvm/test/CodeGen/X86/vector-compare-results.ll +++ b/llvm/test/CodeGen/X86/vector-compare-results.ll @@ -6411,28 +6411,28 @@ define <128 x i1> @test_cmp_v128i8(<128 x i8> %a0, <128 x i8> %a1) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm4 -; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0 +; AVX512DQ-NEXT: vpmovd2m %zmm4, %k0 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k1 ; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k2 +; AVX512DQ-NEXT: vpmovd2m %zmm1, %k2 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k3 ; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k4 +; AVX512DQ-NEXT: vpmovd2m %zmm1, %k4 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k5 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k5 ; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1 -; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k6 +; AVX512DQ-NEXT: vpmovd2m %zmm1, %k6 ; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0 -; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k7 +; AVX512DQ-NEXT: vpmovd2m %zmm0, %k7 ; AVX512DQ-NEXT: kmovw %k7, 14(%rdi) ; AVX512DQ-NEXT: kmovw %k6, 12(%rdi) ; AVX512DQ-NEXT: kmovw %k5, 10(%rdi) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll index 2511c85e791..97462d98109 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -567,7 +567,7 @@ define <16 x float> @test_vshuff32x4_512_mask(<16 x float> %x, <16 x float> %x1, ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 +; AVX512F-NEXT: vpmovd2m %zmm3, %k1 ; AVX512F-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] ; AVX512F-NEXT: vmovaps %zmm2, %zmm0 ; AVX512F-NEXT: retq @@ -589,7 +589,7 @@ define <16 x i32> @test_vshufi32x4_512_mask(<16 x i32> %x, <16 x i32> %x1, <16 x ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3 ; AVX512F-NEXT: vpslld $31, %zmm3, %zmm3 -; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 +; AVX512F-NEXT: vpmovd2m %zmm3, %k1 ; AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,4,5,6,7],zmm1[4,5,6,7,0,1,2,3] ; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm0 ; AVX512F-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll index f2efa878c61..5283d5be395 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-v1.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-v1.ll @@ -30,7 +30,7 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) { ; VL_BW_DQ-LABEL: shuf2i1_1_0: ; VL_BW_DQ: # %bb.0: ; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0 +; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0 ; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 @@ -71,7 +71,7 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) { ; VL_BW_DQ-LABEL: shuf2i1_1_2: ; VL_BW_DQ: # %bb.0: ; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0 -; VL_BW_DQ-NEXT: vptestmq %xmm0, %xmm0, %k0 +; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0 ; VL_BW_DQ-NEXT: movq $-1, %rax ; VL_BW_DQ-NEXT: vmovq %rax, %xmm0 ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm1 @@ -111,7 +111,7 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) { ; VL_BW_DQ-LABEL: shuf4i1_3_2_10: ; VL_BW_DQ: # %bb.0: ; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0 -; VL_BW_DQ-NEXT: vptestmd %xmm0, %xmm0, %k0 +; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0 ; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0 ; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] ; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0 |

