diff options
author | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2017-01-11 12:59:32 +0000 |
---|---|---|
committer | Elena Demikhovsky <elena.demikhovsky@intel.com> | 2017-01-11 12:59:32 +0000 |
commit | 9d0e7c33d3fd35393d3f4de23fad6b33e7699c74 (patch) | |
tree | 3e5c43fc44458e64edd68e5fc6dab478ed644b0d /llvm/lib | |
parent | ffdd0728584319ad800803a3f3328838d832bdfa (diff) | |
download | bcm5719-llvm-9d0e7c33d3fd35393d3f4de23fad6b33e7699c74.tar.gz bcm5719-llvm-9d0e7c33d3fd35393d3f4de23fad6b33e7699c74.zip |
X86 CodeGen: Optimized pattern for truncate with unsigned saturation.
DAG patterns optimization: truncate + unsigned saturation supported by VPMOVUS* instructions in AVX-512.
And VPACKUS* instructions on SEE* targets.
Differential Revision: https://reviews.llvm.org/D28216
llvm-svn: 291670
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 31978ffe6ea..8c8810a4c3d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31220,6 +31220,93 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } +/// Check if truncation with saturation form type \p SrcVT to \p DstVT +/// is valid for the given \p Subtarget. +static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, + const X86Subtarget &Subtarget) { + if (!Subtarget.hasAVX512()) + return false; + + // FIXME: Scalar type may be supported if we move it to vector register. + if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512) + return false; + + EVT SrcElVT = SrcVT.getScalarType(); + EVT DstElVT = DstVT.getScalarType(); + if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64) + return false; + if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32) + return false; + if (SrcVT.is512BitVector() || Subtarget.hasVLX()) + return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); + return false; +} + +/// Return true if VPACK* instruction can be used for the given types +/// and it is avalable on \p Subtarget. +static bool +isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) { + if (Subtarget.hasSSE2()) + // v16i16 -> v16i8 + if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8) + return true; + if (Subtarget.hasSSE41()) + // v8i32 -> v8i16 + if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16) + return true; + return false; +} + +/// Detect a pattern of truncation with saturation: +/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched. +static SDValue detectUSatPattern(SDValue In, EVT VT) { + if (In.getOpcode() != ISD::UMIN) + return SDValue(); + + //Saturation with truncation. We truncate from InVT to VT. + assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() && + "Unexpected types for truncate operation"); + + APInt C; + if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) { + // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according + // the element size of the destination type. + return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) : + SDValue(); + } + return SDValue(); +} + +/// Detect a pattern of truncation with saturation: +/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). +/// The types should allow to use VPMOVUS* instruction on AVX512. +/// Return the source value to be truncated or SDValue() if the pattern was not +/// matched. +static SDValue detectAVX512USatPattern(SDValue In, EVT VT, + const X86Subtarget &Subtarget) { + if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) + return SDValue(); + return detectUSatPattern(In, VT); +} + +static SDValue +combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + SDValue USatVal = detectUSatPattern(In, VT); + if (USatVal) { + if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget)) + return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); + if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL); + return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi); + } + } + return SDValue(); +} + /// This function detects the AVG pattern between vectors of unsigned i8/i16, /// which is c = (a + b + 1) / 2, and replace this operation with the efficient /// X86ISD::AVG instruction. @@ -31786,6 +31873,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG, St->getPointerInfo(), St->getAlignment(), St->getMemOperand()->getFlags()); + if (SDValue Val = + detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget)) + return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(), + dl, Val, St->getBasePtr(), + St->getMemoryVT(), St->getMemOperand(), DAG); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); unsigned NumElems = VT.getVectorNumElements(); assert(StVT != VT && "Cannot truncate to the same type"); @@ -32406,6 +32499,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) return Avg; + // Try to combine truncation with unsigned saturation. + if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget)) + return Val; + // The bitcast source is a direct mmx result. // Detect bitcasts between i32 to x86mmx if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { |