diff options
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 4 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 8 |
5 files changed, 10 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 69438113b74..793a935aa08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -497,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { "Can't promote a vector with multiple results!"); EVT VT = Op.getValueType(); - EVT NewVT; + EVT NewVT = VT; unsigned NewOpc; while (true) { - NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext()); assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { NewOpc = ISD::FP_TO_SINT; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 254d0003e59..bdc312260bc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1174,8 +1174,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote); setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c4e89bdac5a..c1c3f3a3fb0 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7890,11 +7890,6 @@ defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, truncstore_us_vi8, masked_truncstore_us_vi8>; -def : Pat<(v16i16 (fp_to_uint (v16f32 VR512:$src1))), - (VPMOVDWZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>; -def : Pat<(v16i8 (fp_to_uint (v16f32 VR512:$src1))), - (VPMOVDBZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>; - let Predicates = [HasAVX512, NoVLX] in { def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))), (v8i16 (EXTRACT_SUBREG diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index e20a177f722..3a21708b0b2 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -442,7 +442,7 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { define <16 x i8> @f32to16uc(<16 x float> %f) { ; ALL-LABEL: f32to16uc: ; ALL: # BB#0: -; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 +; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 ; ALL-NEXT: vpmovdb %zmm0, %xmm0 ; ALL-NEXT: vzeroupper ; ALL-NEXT: retq @@ -453,7 +453,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { define <16 x i16> @f32to16us(<16 x float> %f) { ; ALL-LABEL: f32to16us: ; ALL: # BB#0: -; ALL-NEXT: vcvttps2udq %zmm0, %zmm0 +; ALL-NEXT: vcvttps2dq %zmm0, %zmm0 ; ALL-NEXT: vpmovdw %zmm0, %ymm0 ; ALL-NEXT: retq %res = fptoui <16 x float> %f to <16 x i16> diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 10d62d3e125..51a1c33b816 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -1578,14 +1578,14 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { define <16 x i8> @f32to16uc(<16 x float> %f) { ; GENERIC-LABEL: f32to16uc: ; GENERIC: # BB#0: -; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 ; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 ; GENERIC-NEXT: vzeroupper ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f32to16uc: ; SKX: # BB#0: -; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] ; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1596,13 +1596,13 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { define <16 x i16> @f32to16us(<16 x float> %f) { ; GENERIC-LABEL: f32to16us: ; GENERIC: # BB#0: -; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 +; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 ; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f32to16us: ; SKX: # BB#0: -; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] ; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = fptoui <16 x float> %f to <16 x i16> |