summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2017-01-11 12:59:32 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2017-01-11 12:59:32 +0000
commit9d0e7c33d3fd35393d3f4de23fad6b33e7699c74 (patch)
tree3e5c43fc44458e64edd68e5fc6dab478ed644b0d /llvm/lib
parentffdd0728584319ad800803a3f3328838d832bdfa (diff)
downloadbcm5719-llvm-9d0e7c33d3fd35393d3f4de23fad6b33e7699c74.tar.gz
bcm5719-llvm-9d0e7c33d3fd35393d3f4de23fad6b33e7699c74.zip
X86 CodeGen: Optimized pattern for truncate with unsigned saturation.
DAG patterns optimization: truncate + unsigned saturation supported by VPMOVUS* instructions in AVX-512. And VPACKUS* instructions on SEE* targets. Differential Revision: https://reviews.llvm.org/D28216 llvm-svn: 291670
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp97
1 files changed, 97 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 31978ffe6ea..8c8810a4c3d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31220,6 +31220,93 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
+/// Check if truncation with saturation form type \p SrcVT to \p DstVT
+/// is valid for the given \p Subtarget.
+static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return false;
+
+ // FIXME: Scalar type may be supported if we move it to vector register.
+ if (!SrcVT.isVector() || !SrcVT.isSimple() || SrcVT.getSizeInBits() > 512)
+ return false;
+
+ EVT SrcElVT = SrcVT.getScalarType();
+ EVT DstElVT = DstVT.getScalarType();
+ if (SrcElVT.getSizeInBits() < 16 || SrcElVT.getSizeInBits() > 64)
+ return false;
+ if (DstElVT.getSizeInBits() < 8 || DstElVT.getSizeInBits() > 32)
+ return false;
+ if (SrcVT.is512BitVector() || Subtarget.hasVLX())
+ return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
+ return false;
+}
+
+/// Return true if VPACK* instruction can be used for the given types
+/// and it is avalable on \p Subtarget.
+static bool
+isSATValidOnSSESubtarget(EVT SrcVT, EVT DstVT, const X86Subtarget &Subtarget) {
+ if (Subtarget.hasSSE2())
+ // v16i16 -> v16i8
+ if (SrcVT == MVT::v16i16 && DstVT == MVT::v16i8)
+ return true;
+ if (Subtarget.hasSSE41())
+ // v8i32 -> v8i16
+ if (SrcVT == MVT::v8i32 && DstVT == MVT::v8i16)
+ return true;
+ return false;
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectUSatPattern(SDValue In, EVT VT) {
+ if (In.getOpcode() != ISD::UMIN)
+ return SDValue();
+
+ //Saturation with truncation. We truncate from InVT to VT.
+ assert(In.getScalarValueSizeInBits() > VT.getScalarSizeInBits() &&
+ "Unexpected types for truncate operation");
+
+ APInt C;
+ if (ISD::isConstantSplatVector(In.getOperand(1).getNode(), C)) {
+ // C should be equal to UINT32_MAX / UINT16_MAX / UINT8_MAX according
+ // the element size of the destination type.
+ return APIntOps::isMask(VT.getScalarSizeInBits(), C) ? In.getOperand(0) :
+ SDValue();
+ }
+ return SDValue();
+}
+
+/// Detect a pattern of truncation with saturation:
+/// (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
+/// The types should allow to use VPMOVUS* instruction on AVX512.
+/// Return the source value to be truncated or SDValue() if the pattern was not
+/// matched.
+static SDValue detectAVX512USatPattern(SDValue In, EVT VT,
+ const X86Subtarget &Subtarget) {
+ if (!isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+ return SDValue();
+ return detectUSatPattern(In, VT);
+}
+
+static SDValue
+combineTruncateWithUSat(SDValue In, EVT VT, SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ SDValue USatVal = detectUSatPattern(In, VT);
+ if (USatVal) {
+ if (isSATValidOnAVX512Subtarget(In.getValueType(), VT, Subtarget))
+ return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
+ if (isSATValidOnSSESubtarget(In.getValueType(), VT, Subtarget)) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(USatVal, DL);
+ return DAG.getNode(X86ISD::PACKUS, DL, VT, Lo, Hi);
+ }
+ }
+ return SDValue();
+}
+
/// This function detects the AVG pattern between vectors of unsigned i8/i16,
/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
/// X86ISD::AVG instruction.
@@ -31786,6 +31873,12 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
St->getPointerInfo(), St->getAlignment(),
St->getMemOperand()->getFlags());
+ if (SDValue Val =
+ detectAVX512USatPattern(St->getValue(), St->getMemoryVT(), Subtarget))
+ return EmitTruncSStore(false /* Unsigned saturation */, St->getChain(),
+ dl, Val, St->getBasePtr(),
+ St->getMemoryVT(), St->getMemOperand(), DAG);
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned NumElems = VT.getVectorNumElements();
assert(StVT != VT && "Cannot truncate to the same type");
@@ -32406,6 +32499,10 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL))
return Avg;
+ // Try to combine truncation with unsigned saturation.
+ if (SDValue Val = combineTruncateWithUSat(Src, VT, DL, DAG, Subtarget))
+ return Val;
+
// The bitcast source is a direct mmx result.
// Detect bitcasts between i32 to x86mmx
if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) {
OpenPOWER on IntegriCloud