summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--llvm/test/CodeGen/X86/scalar-fp-to-i64.ll24
2 files changed, 22 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c6f834cc46c..2e7d3d8df73 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28690,12 +28690,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
if (Subtarget.hasDQI() && VT == MVT::i64 &&
(SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
assert(!Subtarget.is64Bit() && "i64 should be legal");
- unsigned NumElts = Subtarget.hasVLX() ? 4 : 8;
- // Using a 256-bit input here to guarantee 128-bit input for f32 case.
- // TODO: Use 128-bit vectors for f64 case?
- // TODO: Use 128-bit vectors for f32 by using CVTTP2SI/CVTTP2UI.
+ unsigned NumElts = Subtarget.hasVLX() ? 2 : 8;
+ // If we use a 128-bit result we might need to use a target specific node.
+ unsigned SrcElts =
+ std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits());
MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts);
- MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts);
+ MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts);
+ unsigned Opc = N->getOpcode();
+ if (NumElts != SrcElts) {
+ if (IsStrict)
+ Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI;
+ else
+ Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI;
+ }
SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT,
@@ -28704,10 +28711,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Chain;
if (IsStrict) {
SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
- Res = DAG.getNode(N->getOpcode(), SDLoc(N), Tys, N->getOperand(0), Res);
+ Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res);
Chain = Res.getValue(1);
} else
- Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res);
+ Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res);
Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx);
Results.push_back(Res);
if (IsStrict)
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
index 6164ebe672a..30e4996057a 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -42,19 +42,17 @@ define i64 @f_to_u64(float %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: f_to_u64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: f_to_u64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: f_to_u64:
@@ -337,19 +335,17 @@ define i64 @f_to_s64(float %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: f_to_s64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: f_to_s64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: f_to_s64:
@@ -524,19 +520,17 @@ define i64 @d_to_u64(double %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: d_to_u64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: d_to_u64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %ymm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: d_to_u64:
@@ -819,19 +813,17 @@ define i64 @d_to_s64(double %a) nounwind {
; AVX512DQVL_32_WIN-LABEL: d_to_s64:
; AVX512DQVL_32_WIN: # %bb.0:
; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %ymm0, %ymm0
+; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_WIN-NEXT: vzeroupper
; AVX512DQVL_32_WIN-NEXT: retl
;
; AVX512DQVL_32_LIN-LABEL: d_to_s64:
; AVX512DQVL_32_LIN: # %bb.0:
; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %ymm0, %ymm0
+; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %xmm0, %xmm0
; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax
; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx
-; AVX512DQVL_32_LIN-NEXT: vzeroupper
; AVX512DQVL_32_LIN-NEXT: retl
;
; AVX512_64-LABEL: d_to_s64:
OpenPOWER on IntegriCloud