diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 48 | ||||
| -rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 16 |
3 files changed, 44 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 22bdace3453..2c1faa157dd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15543,7 +15543,6 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SrcVT.isVector()) { if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { return DAG.getNode(X86ISD::CVTSI2P, dl, VT, @@ -15551,9 +15550,15 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, DAG.getUNDEF(SrcVT))); } if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT)) - return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v2i64, Src)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(VT == MVT::v2f64 && "Unexpected type"); + Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Src, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTSI2P, dl, Op.getValueType(), + DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Src)); + } + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::SIGN_EXTEND, dl, IntegerVT, Src)); @@ -15903,9 +15908,15 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op, SDLoc dl(Op); if (SrcVT.getVectorElementType() == MVT::i1) { - if (SrcVT == MVT::v2i1) - return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), - DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0)); + if (SrcVT == MVT::v2i1) { + // For v2i1, we need to widen to v4i1 first. + assert(Op.getValueType() == MVT::v2f64 && "Unexpected type"); + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, N0, + DAG.getUNDEF(MVT::v2i1)); + return DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v2f64, + DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0)); + } + MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements()); return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(), DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0)); diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index e88ec9d7b15..18e9f306bc1 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -1691,8 +1691,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; VLDQ: # %bb.0: ; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0 -; VLDQ-NEXT: vpmovm2q %k0, %xmm0 -; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 +; VLDQ-NEXT: vpmovm2d %k0, %xmm0 +; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; VLDQ-NEXT: retq ; ; VLNODQ-LABEL: sbto2f64: @@ -1700,12 +1700,8 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1 ; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; VLNODQ-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} -; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax -; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1 -; VLNODQ-NEXT: vmovq %xmm0, %rax -; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0 -; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VLNODQ-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} +; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 ; VLNODQ-NEXT: retq %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> @@ -2002,30 +1998,22 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; NOVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; NOVL-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; NOVL-NEXT: vpand %xmm1, %xmm0, %xmm0 +; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 +; NOVL-NEXT: # kill: def %xmm0 killed %xmm0 killed %zmm0 +; NOVL-NEXT: vzeroupper ; NOVL-NEXT: retq ; -; VLDQ-LABEL: ubto2f64: -; VLDQ: # %bb.0: -; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 -; VLDQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} -; VLDQ-NEXT: vcvtqq2pd %xmm0, %xmm0 -; VLDQ-NEXT: retq -; -; VLNODQ-LABEL: ubto2f64: -; VLNODQ: # %bb.0: -; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 -; VLNODQ-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} -; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax -; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1 -; VLNODQ-NEXT: vmovq %xmm0, %rax -; VLNODQ-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm0 -; VLNODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; VLNODQ-NEXT: retq +; VL-LABEL: ubto2f64: +; VL: # %bb.0: +; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 +; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} +; VL-NEXT: vcvtudq2pd %xmm0, %xmm0 +; VL-NEXT: retq %mask = icmp ult <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x double> ret <2 x double> %1 diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 78111874b58..306b95f0f3a 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -2602,16 +2602,16 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00] +; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sbto2f64: ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # sched: [3:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] -; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> @@ -2989,8 +2989,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50] ; GENERIC-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00] -; GENERIC-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [4:0.50] -; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00] +; GENERIC-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [5:1.00] +; GENERIC-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto2f64: @@ -2998,8 +2998,8 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33] ; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 # sched: [3:1.00] -; SKX-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] -; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} # sched: [7:0.50] +; SKX-NEXT: vcvtudq2pd %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ult <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x double> |

