summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-16 14:48:32 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-11-16 14:48:32 +0000
commitb57dd17142a63c26157f61f1f8ae43f86c7be8c4 (patch)
tree5ead492053fbdb15a610637f42a1db16a7961986 /llvm/lib
parent678dd8fc62c5f57cca2b304376538a9f2fe3fb30 (diff)
downloadbcm5719-llvm-b57dd17142a63c26157f61f1f8ae43f86c7be8c4.tar.gz
bcm5719-llvm-b57dd17142a63c26157f61f1f8ae43f86c7be8c4.zip
[X86][AVX512] Autoupgrade lossless i32/u32 to f64 conversion intrinsics with generic IR
Both the (V)CVTDQ2PD (i32 to f64) and (V)CVTUDQ2PD (u32 to f64) conversion instructions are lossless and can be safely represented as generic SINT_TO_FP/UINT_TO_FP calls instead of x86 intrinsics without affecting final codegen. LLVM counterpart to D26686 Differential Revision: https://reviews.llvm.org/D26736 llvm-svn: 287108
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp17
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h12
3 files changed, 29 insertions, 21 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 3d079ee35f5..18f2c17c7c9 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -295,6 +295,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("avx512.mask.padd.") || // Added in 4.0
Name.startswith("avx512.mask.psub.") || // Added in 4.0
Name.startswith("avx512.mask.pmull.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
+ Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
Name == "avx512.mask.add.pd.128" || // Added in 4.0
Name == "avx512.mask.add.pd.256" || // Added in 4.0
Name == "avx512.mask.add.ps.128" || // Added in 4.0
@@ -821,7 +823,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
} else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
Name == "sse2.cvtps2pd" ||
Name == "avx.cvtdq2.pd.256" ||
- Name == "avx.cvt.ps2.pd.256")) {
+ Name == "avx.cvt.ps2.pd.256" ||
+ Name.startswith("avx512.mask.cvtdq2pd.") ||
+ Name.startswith("avx512.mask.cvtudq2pd."))) {
// Lossless i32/float to double conversion.
// Extract the bottom elements if necessary and convert to double vector.
Value *Src = CI->getArgOperand(0);
@@ -837,11 +841,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
ShuffleMask);
}
- bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
- if (Int2Double)
+ bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
+ bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
+ if (SInt2Double)
Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
+ else if (UInt2Double)
+ Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
else
Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+
+ if (CI->getNumArgOperands() == 3)
+ Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+ CI->getArgOperand(1));
} else if (IsX86 && Name.startswith("sse4a.movnt.")) {
Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index af88324a4c4..9397f8c9cf5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1269,6 +1269,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
@@ -14028,26 +14029,34 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
SelectionDAG &DAG) const {
SDValue N0 = Op.getOperand(0);
- MVT SVT = N0.getSimpleValueType();
+ MVT VT = Op.getSimpleValueType();
+ MVT SrcVT = N0.getSimpleValueType();
SDLoc dl(Op);
- if (SVT.getVectorElementType() == MVT::i1) {
- if (SVT == MVT::v2i1)
+ if (SrcVT.getVectorElementType() == MVT::i1) {
+ if (SrcVT == MVT::v2i1)
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
- MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+ MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
}
- switch (SVT.SimpleTy) {
+ switch (SrcVT.SimpleTy) {
default:
llvm_unreachable("Custom UINT_TO_FP is not supported!");
+ case MVT::v2i32: {
+ if (VT == MVT::v2f64)
+ return DAG.getNode(X86ISD::CVTUDQ2PD, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, N0,
+ DAG.getUNDEF(SrcVT)));
+ return SDValue();
+ }
case MVT::v4i8:
case MVT::v4i16:
case MVT::v8i8:
case MVT::v8i16: {
- MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+ MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
}
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 7f7fe323a8e..a6b1d29e8f9 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -488,12 +488,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::CONFLICT, 0),
X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
X86ISD::CONFLICT, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTDQ2PD, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK,
- ISD::SINT_TO_FP, 0), // no rm
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK,
ISD::SINT_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK,
@@ -624,12 +618,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
ISD::FP_TO_UINT, 0),
X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
- X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK,
- X86ISD::CVTUDQ2PD, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, 0),
- X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK,
- ISD::UINT_TO_FP, 0), // no rm
X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK,
ISD::UINT_TO_FP, 0),
X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK,
OpenPOWER on IntegriCloud