summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp108
1 files changed, 107 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2298eb096da..365efa8825e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -28968,12 +28968,118 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
EltNo);
}
+// Try to match patterns such as
+// (i16 bitcast (v16i1 x))
+// ->
+// (i16 movmsk (16i8 sext (v16i1 x)))
+// before the illegal vector is scalarized on subtargets that don't have legal
+// vxi1 types.
+static SDValue combineBitcastvxi1(SelectionDAG &DAG, SDValue BitCast,
+ const X86Subtarget &Subtarget) {
+ EVT VT = BitCast.getValueType();
+ SDValue N0 = BitCast.getOperand(0);
+ EVT VecVT = N0->getValueType(0);
+
+ if (!VT.isScalarInteger() || !VecVT.isSimple())
+ return SDValue();
+
+ // With AVX512 vxi1 types are legal and we prefer using k-regs.
+ // MOVMSK is supported in SSE2 or later.
+ if (Subtarget.hasAVX512() || !Subtarget.hasSSE2())
+ return SDValue();
+
+ // There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
+ // v8f64. So all legal 128-bit and 256-bit vectors are covered except for
+ // v8i16 and v16i16.
+ // For these two cases, we can shuffle the upper element bytes to a
+ // consecutive sequence at the start of the vector and treat the results as
+ // v16i8 or v32i8, and for v61i8 this is the prefferable solution. However,
+ // for v16i16 this is not the case, because the shuffle is expensive, so we
+ // avoid sign-exteding to this type entirely.
+ // For example, t0 := (v8i16 sext(v8i1 x)) needs to be shuffled as:
+ // (v16i8 shuffle <0,2,4,6,8,10,12,14,u,u,...,u> (v16i8 bitcast t0), undef)
+ MVT SExtVT;
+ MVT FPCastVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+ switch (VecVT.getSimpleVT().SimpleTy) {
+ default:
+ return SDValue();
+ case MVT::v2i1:
+ SExtVT = MVT::v2i64;
+ FPCastVT = MVT::v2f64;
+ break;
+ case MVT::v4i1:
+ SExtVT = MVT::v4i32;
+ FPCastVT = MVT::v4f32;
+ // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2))
+ // sign-extend to a 256-bit operation to avoid truncation.
+ if (N0->getOpcode() == ISD::SETCC &&
+ N0->getOperand(0)->getValueType(0).is256BitVector() &&
+ Subtarget.hasInt256()) {
+ SExtVT = MVT::v4i64;
+ FPCastVT = MVT::v4f64;
+ }
+ break;
+ case MVT::v8i1:
+ SExtVT = MVT::v8i16;
+ // For cases such as (i8 bitcast (v8i1 setcc v8i32 v1, v2)),
+ // sign-extend to a 256-bit operation to match the compare.
+ // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over
+ // 256-bit because the shuffle is cheaper than sign extending the result of
+ // the compare.
+ if (N0->getOpcode() == ISD::SETCC &&
+ N0->getOperand(0)->getValueType(0).is256BitVector() &&
+ Subtarget.hasInt256()) {
+ SExtVT = MVT::v8i32;
+ FPCastVT = MVT::v8f32;
+ }
+ break;
+ case MVT::v16i1:
+ SExtVT = MVT::v16i8;
+ // For the case (i16 bitcast (v16i1 setcc v16i16 v1, v2)),
+ // it is not profitable to sign-extend to 256-bit because this will
+ // require an extra cross-lane shuffle which is more exprensive than
+ // truncating the result of the compare to 128-bits.
+ break;
+ case MVT::v32i1:
+ // TODO: Handle pre-AVX2 cases by splitting to two v16i1's.
+ if (!Subtarget.hasInt256())
+ return SDValue();
+ SExtVT = MVT::v32i8;
+ break;
+ };
+
+ SDLoc DL(BitCast);
+ SDValue V = DAG.getSExtOrTrunc(N0, DL, SExtVT);
+ if (SExtVT == MVT::v8i16) {
+ V = DAG.getBitcast(MVT::v16i8, V);
+ V = DAG.getVectorShuffle(
+ MVT::v16i8, DL, V, DAG.getUNDEF(MVT::v16i8),
+ {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
+ } else
+ assert(SExtVT.getScalarType() != MVT::i16 &&
+ "Vectors of i16 must be shuffled");
+ if (FPCastVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
+ V = DAG.getBitcast(FPCastVT, V);
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
+ return DAG.getZExtOrTrunc(V, DL, VT);
+}
+
static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
EVT SrcVT = N0.getValueType();
+ // Try to match patterns such as
+ // (i16 bitcast (v16i1 x))
+ // ->
+ // (i16 movmsk (16i8 sext (v16i1 x)))
+ // before the setcc result is scalarized on subtargets that don't have legal
+ // vxi1 types.
+ if (DCI.isBeforeLegalize())
+ if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
+ return V;
// Since MMX types are special and don't usually play with other vector types,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.
@@ -35079,7 +35185,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VSELECT:
case ISD::SELECT:
case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
- case ISD::BITCAST: return combineBitcast(N, DAG, Subtarget);
+ case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
case ISD::ADD: return combineAdd(N, DAG, Subtarget);
case ISD::SUB: return combineSub(N, DAG, Subtarget);
OpenPOWER on IntegriCloud