summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-11-09 07:48:51 +0000
committerCraig Topper <craig.topper@gmail.com>2016-11-09 07:48:51 +0000
commitf334ac19adfbfcbd9f12185d5b64bdf35b75f7f9 (patch)
tree54ae291b6c65932cc4aec79d94dd8b5c0c549f48 /llvm/lib
parent731bf9c5d61752bf63bf2d0eb11d7d63e9543a93 (diff)
downloadbcm5719-llvm-f334ac19adfbfcbd9f12185d5b64bdf35b75f7f9.tar.gz
bcm5719-llvm-f334ac19adfbfcbd9f12185d5b64bdf35b75f7f9.zip
[AVX-512] Add lowering to cvttpd2udq/cvttps2udq for fptoui v2f64/2f32 to 2i32
This patch adds support for fptoui to 2i32 from both 2f64 and 2f32, building on Simon's change for the signed version in r284459 and using AVX-512 instructions. If we don't have VLX support we need to use a 512-bit operation for v2f64->v2i32 and extract the result. It also recognises that cvttpd2udq zeroes the upper 64-bits of the xmm result. Differential Revision: https://reviews.llvm.org/D26331 llvm-svn: 286345
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td12
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h2
5 files changed, 26 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7405e9ddb78..fceaecf0bc3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1193,6 +1193,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
@@ -22358,12 +22359,16 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
- if (IsSigned && N->getValueType(0) == MVT::v2i32) {
+ if (N->getValueType(0) == MVT::v2i32) {
+ assert((IsSigned || Subtarget.hasAVX512()) &&
+ "Can only handle signed conversion without AVX512");
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue Src = N->getOperand(0);
if (Src.getValueType() == MVT::v2f64) {
SDValue Idx = DAG.getIntPtrConstant(0, dl);
- SDValue Res = DAG.getNode(X86ISD::CVTTPD2DQ, dl, MVT::v4i32, Src);
+ SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTPD2DQ
+ : X86ISD::CVTTPD2UDQ,
+ dl, MVT::v4i32, Src);
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
@@ -22372,7 +22377,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SDValue Idx = DAG.getIntPtrConstant(0, dl);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
DAG.getUNDEF(MVT::v2f32));
- Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, Res);
+ Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
+ : ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
Results.push_back(Res);
return;
@@ -22700,6 +22706,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VFPROUND_RND: return "X86ISD::VFPROUND_RND";
case X86ISD::VFPROUNDS_RND: return "X86ISD::VFPROUNDS_RND";
case X86ISD::CVTTPD2DQ: return "X86ISD::CVTTPD2DQ";
+ case X86ISD::CVTTPD2UDQ: return "X86ISD::CVTTPD2UDQ";
case X86ISD::CVTDQ2PD: return "X86ISD::CVTDQ2PD";
case X86ISD::CVTUDQ2PD: return "X86ISD::CVTUDQ2PD";
case X86ISD::CVT2MASK: return "X86ISD::CVT2MASK";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 62b77ab6340..dabef9d2160 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -302,8 +302,8 @@ namespace llvm {
// Vector FP round.
VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
- // Vector double to signed integer (truncated).
- CVTTPD2DQ,
+ // Vector double to signed/unsigned integer (truncated).
+ CVTTPD2DQ, CVTTPD2UDQ,
// Vector signed/unsigned integer to double.
CVTDQ2PD, CVTUDQ2PD,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 408393183cb..671bfaadc3b 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -6314,8 +6314,8 @@ defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
X86cvttp2uiRnd>, PS,
EVEX_CD8<32, CD8VF>;
-defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, fp_to_uint,
- X86cvttp2uiRnd>, PS, VEX_W,
+defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
+ X86cvttpd2udq, X86cvttp2uiRnd>, PS, VEX_W,
EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
@@ -6395,6 +6395,11 @@ def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
VR256X:$src1, sub_ymm)))), sub_xmm)>;
+def : Pat<(v4i32 (X86cvttpd2udq (v2f64 VR128X:$src))),
+ (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
+ (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+ VR128X:$src, sub_xmm)))), sub_xmm)>;
+
def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
(EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
@@ -6416,6 +6421,9 @@ let Predicates = [HasAVX512, HasVLX] in {
def : Pat<(X86vzmovl (v2i64 (bitconvert
(v4i32 (X86cvttpd2dq (v2f64 VR128X:$src)))))),
(VCVTTPD2DQZ128rr VR128:$src)>;
+ def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttpd2udq (v2f64 VR128X:$src)))))))),
+ (VCVTTPD2UDQZ128rr VR128:$src)>;
}
let Predicates = [HasAVX512] in {
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 377d8b61cde..0b834fa99ce 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -70,6 +70,9 @@ def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>;
def X86cvttpd2dq: SDNode<"X86ISD::CVTTPD2DQ",
SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>,
SDTCisVT<1, v2f64>]>>;
+def X86cvttpd2udq: SDNode<"X86ISD::CVTTPD2UDQ",
+ SDTypeProfile<1, 1, [SDTCisVT<0, v4i32>,
+ SDTCisVT<1, v2f64>]>>;
def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
SDTCisVT<1, v4i32>]>>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index f83878b10c8..af269ac87cb 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -586,7 +586,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
- ISD::FP_TO_UINT, 0),
+ X86ISD::CVTTPD2UDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
ISD::FP_TO_UINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
OpenPOWER on IntegriCloud