diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx-schedule.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 13 |
6 files changed, 30 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 46dfdb9e319..8f3afb91867 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -668,7 +668,7 @@ def : Pat<(f64 (bitconvert (x86mmx VR64:$src))), def : Pat<(x86mmx (bitconvert (f64 FR64:$src))), (MMX_MOVFR642Qrr FR64:$src)>; def : Pat<(x86mmx (MMX_X86movdq2q - (bc_v2i64 (v4i32 (int_x86_sse2_cvtps2dq VR128:$src))))), + (bc_v2i64 (v4i32 (X86cvtp2Int (v4f32 VR128:$src)))))), (MMX_CVTPS2PIirr VR128:$src)>; def : Pat<(x86mmx (MMX_X86movdq2q (bc_v2i64 (v4i32 (fp_to_sint (v4f32 VR128:$src)))))), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d514888e073..a7c7a4f2947 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1782,34 +1782,36 @@ def : Pat<(v4f32 (X86Movss (CVTSI2SSrr_Int VR128:$dst, GR32:$src)>; } // Predicates = [UseSSE1] +let Predicates = [HasAVX, NoVLX] in { // Convert packed single/double fp to doubleword def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], + [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))], IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))], + (v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))], IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>, VEX_WIG; def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvt_ps2dq_256 VR256:$src))], + (v8i32 (X86cvtp2Int (v8f32 VR256:$src))))], IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>, VEX_WIG; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR256:$dst, - (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))], + (v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))], IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>, VEX_WIG; +} def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], + [(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))], IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], + (v4i32 (X86cvtp2Int (memopv4f32 addr:$src))))], IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 5181d34aeba..3e3a863f848 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -374,6 +374,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx_cmp_ps_256, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0), X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), + X86_INTRINSIC_DATA(avx_cvt_ps2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), @@ -1602,6 +1603,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_cvtdq2ps, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0), X86_INTRINSIC_DATA(sse2_cvtpd2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvtpd2ps, INTR_TYPE_1OP, X86ISD::VFPROUND, 0), + X86_INTRINSIC_DATA(sse2_cvtps2dq, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index 41bf89e890e..6c18efd2edf 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -178,10 +178,15 @@ declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) { -; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256: -; CHECK: # %bb.0: -; CHECK-NEXT: vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; AVX-LABEL: test_x86_avx_cvt_ps2dq_256: +; AVX: # %bb.0: +; AVX-NEXT: vcvtps2dq %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x5b,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512VL-LABEL: test_x86_avx_cvt_ps2dq_256: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvtps2dq %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5b,0xc0] +; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res } diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index ad06954dbef..9aaca64baee 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -1408,7 +1408,7 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33] ; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50] -; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index e3c02b625fb..f14356fe76b 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -369,10 +369,15 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { ; SSE-NEXT: cvtps2dq %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x5b,0xc0] ; SSE-NEXT: retl ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse2_cvtps2dq: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vcvtps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5b,0xc0] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX2-LABEL: test_x86_sse2_cvtps2dq: +; AVX2: ## %bb.0: +; AVX2-NEXT: vcvtps2dq %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x5b,0xc0] +; AVX2-NEXT: retl ## encoding: [0xc3] +; +; SKX-LABEL: test_x86_sse2_cvtps2dq: +; SKX: ## %bb.0: +; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0] +; SKX-NEXT: retl ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } |