summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp113
1 files changed, 113 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a5e65f83936..121542fb1bc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17274,6 +17274,24 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
DAG.getConstant(CmpMode, dl, MVT::i8));
}
+ // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2.
+ // Revert part of the simplifySetCCWithAnd combine, to avoid an invert.
+ if (Cond == ISD::SETNE && ISD::isBuildVectorAllZeros(Op1.getNode())) {
+ SDValue BC0 = peekThroughBitcasts(Op0);
+ if (BC0.getOpcode() == ISD::AND) {
+ APInt UndefElts;
+ SmallVector<APInt, 64> EltBits;
+ if (getTargetConstantBitsFromNode(BC0.getOperand(1),
+ VT.getScalarSizeInBits(), UndefElts,
+ EltBits, false, false)) {
+ if (llvm::all_of(EltBits, [](APInt &V) { return V.isPowerOf2(); })) {
+ Cond = ISD::SETEQ;
+ Op1 = DAG.getBitcast(VT, BC0.getOperand(1));
+ }
+ }
+ }
+ }
+
// We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
@@ -34480,6 +34498,95 @@ static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) {
CMovN.getOperand(2), CMovN.getOperand(3));
}
+// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)).
+// This is more or less the reverse of combineBitcastvxi1.
+static SDValue
+combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND &&
+ Opcode != ISD::ANY_EXTEND)
+ return SDValue();
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+ if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SVT = VT.getScalarType();
+ EVT InSVT = N0.getValueType().getScalarType();
+ unsigned EltSizeInBits = SVT.getSizeInBits();
+
+ // Input type must be extending a bool vector (bit-casted from a scalar
+ // integer) to legal integer types.
+ if (!VT.isVector())
+ return SDValue();
+ if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8)
+ return SDValue();
+ if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ SDValue N00 = N0.getOperand(0);
+ EVT SclVT = N0.getOperand(0).getValueType();
+ if (!SclVT.isScalarInteger())
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Vec;
+ SmallVector<int, 32> ShuffleMask;
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size");
+
+ // Broadcast the scalar integer to the vector elements.
+ if (NumElts > EltSizeInBits) {
+ // If the scalar integer is greater than the vector element size, then we
+ // must split it down into sub-sections for broadcasting. For example:
+ // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections.
+ // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections.
+ assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale");
+ unsigned Scale = NumElts / EltSizeInBits;
+ EVT BroadcastVT =
+ EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits);
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00);
+ Vec = DAG.getBitcast(VT, Vec);
+
+ for (unsigned i = 0; i != Scale; ++i)
+ ShuffleMask.append(EltSizeInBits, i);
+ } else {
+ // For smaller scalar integers, we can simply any-extend it to the vector
+ // element size (we don't care about the upper bits) and broadcast it to all
+ // elements.
+ SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT);
+ Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl);
+ ShuffleMask.append(NumElts, 0);
+ }
+ Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask);
+
+ // Now, mask the relevant bit in each element.
+ SmallVector<SDValue, 32> Bits;
+ for (int i = 0; i != NumElts; ++i) {
+ int BitIdx = (i % EltSizeInBits);
+ APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1);
+ Bits.push_back(DAG.getConstant(Bit, DL, SVT));
+ }
+ SDValue BitMask = DAG.getBuildVector(VT, DL, Bits);
+ Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask);
+
+ // Compare against the bitmask and extend the result.
+ EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
+ Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ);
+ Vec = DAG.getSExtOrTrunc(Vec, DL, VT);
+
+ // For SEXT, this is now done, otherwise shift the result down for
+ // zero-extension.
+ if (Opcode == ISD::SIGN_EXTEND)
+ return Vec;
+ return DAG.getNode(ISD::SRL, DL, VT, Vec,
+ DAG.getConstant(EltSizeInBits - 1, DL, VT));
+}
+
/// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or
/// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating
/// with UNDEFs) of the input to vectors of the same size as the target type
@@ -34619,6 +34726,9 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
return V;
+ if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
+ return V;
+
if (Subtarget.hasAVX() && VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
@@ -34755,6 +34865,9 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget))
return V;
+ if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget))
+ return V;
+
if (VT.is256BitVector())
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
OpenPOWER on IntegriCloud