summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-11-28 23:56:02 +0000
committerCraig Topper <craig.topper@intel.com>2017-11-28 23:56:02 +0000
commit88ffb5d4d5c675afa013877f20922b928836b74f (patch)
tree40d13c5c45cd5c22f7831dff1c299b8409118fd3
parent3f749c2d4bb80b51d341fd6282a8e39e0e24a247 (diff)
downloadbcm5719-llvm-88ffb5d4d5c675afa013877f20922b928836b74f.tar.gz
bcm5719-llvm-88ffb5d4d5c675afa013877f20922b928836b74f.zip
[X86] Mark ISD::FP_TO_UINT v16i8/v16i16 as Promote under AVX512 instead of legal. Fix infinite loop in op legalization when promotion requires 2 steps.
Previously we had an isel pattern to add the truncate. Instead use Promote to add the truncate to the DAG before isel. The Promote legalization code had to be updated to prevent an infinite loop if promotion took multiple steps because it wasn't remembering the previously tried value. llvm-svn: 319259
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td5
-rw-r--r--llvm/test/CodeGen/X86/avx512-cvt.ll4
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll8
5 files changed, 10 insertions, 15 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 69438113b74..793a935aa08 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -497,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
- EVT NewVT;
+ EVT NewVT = VT;
unsigned NewOpc;
while (true) {
- NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
NewOpc = ISD::FP_TO_SINT;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 254d0003e59..bdc312260bc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1174,8 +1174,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Legal);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v16i16, Promote);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index c4e89bdac5a..c1c3f3a3fb0 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -7890,11 +7890,6 @@ defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs,
defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus,
truncstore_us_vi8, masked_truncstore_us_vi8>;
-def : Pat<(v16i16 (fp_to_uint (v16f32 VR512:$src1))),
- (VPMOVDWZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-def : Pat<(v16i8 (fp_to_uint (v16f32 VR512:$src1))),
- (VPMOVDBZrr (v16i32 (VCVTTPS2UDQZrr VR512:$src1)))>, Requires<[HasAVX512]>;
-
let Predicates = [HasAVX512, NoVLX] in {
def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
(v8i16 (EXTRACT_SUBREG
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index e20a177f722..3a21708b0b2 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -442,7 +442,7 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
define <16 x i8> @f32to16uc(<16 x float> %f) {
; ALL-LABEL: f32to16uc:
; ALL: # BB#0:
-; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdb %zmm0, %xmm0
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
@@ -453,7 +453,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
define <16 x i16> @f32to16us(<16 x float> %f) {
; ALL-LABEL: f32to16us:
; ALL: # BB#0:
-; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
+; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: vpmovdw %zmm0, %ymm0
; ALL-NEXT: retq
%res = fptoui <16 x float> %f to <16 x i16>
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
index 10d62d3e125..51a1c33b816 100755
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx512-schedule.ll
@@ -1578,14 +1578,14 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
define <16 x i8> @f32to16uc(<16 x float> %f) {
; GENERIC-LABEL: f32to16uc:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdb %zmm0, %xmm0
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16uc:
; SKX: # BB#0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vzeroupper # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
@@ -1596,13 +1596,13 @@ define <16 x i8> @f32to16uc(<16 x float> %f) {
define <16 x i16> @f32to16us(<16 x float> %f) {
; GENERIC-LABEL: f32to16us:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0
+; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0
; GENERIC-NEXT: vpmovdw %zmm0, %ymm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: f32to16us:
; SKX: # BB#0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33]
+; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33]
; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
%res = fptoui <16 x float> %f to <16 x i16>
OpenPOWER on IntegriCloud