summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td5
-rw-r--r--llvm/test/CodeGen/X86/avx512-cvt.ll4
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll8
5 files changed, 10 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 69438113b74..793a935aa08 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -497,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
- EVT NewVT;
+ EVT NewVT = VT;
unsigned NewOpc;
while (true) {
- NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
NewOpc = ISD::FP_TO_SINT;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 254d0003e59..bdc312260bc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1174,8 +1174,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index c4e89bdac5a..c1c3f3a3fb0 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7890,11 +7890,6 @@ defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs,
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8>;
-def : Pat<(v16i16 (fp_to_uint (v16f32 VR512:$src1))),
- (VPMOVDWZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-def : Pat<(v16i8 (fp_to_uint (v16f32 VR512:$src1))),
- (VPMOVDBZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
(v8i16 (EXTRACT_SUBREG
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index e20a177f722..3a21708b0b2 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -442,7 +442,7 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
define <16 x i8> @f32to16uc(<16 x float> %f) {
; ALL-LABEL: f32to16uc:
; ALL: # BB#0:
-; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdb %zmm0, %xmm0
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
@@ -453,7 +453,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
define <16 x i16> @f32to16us(<16 x float> %f) {
; ALL-LABEL: f32to16us:
; ALL: # BB#0:
-; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdw %zmm0, %ymm0
; ALL-NEXT: retq
%res = fptoui <16 x float> %f to <16 x i16>
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 10d62d3e125..51a1c33b816 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -1578,14 +1578,14 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
define <16 x i8> @f32to16uc(<16 x float> %f) {
; GENERIC-LABEL: f32to16uc:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdb %zmm0, %xmm0
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16uc:
; SKX: # BB#0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1596,13 +1596,13 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
define <16 x i16> @f32to16us(<16 x float> %f) {
; GENERIC-LABEL: f32to16us:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdw %zmm0, %ymm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16us:
; SKX: # BB#0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = fptoui <16 x float> %f to <16 x i16>
OpenPOWER on IntegriCloud