diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a5e65f83936..121542fb1bc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17274,6 +17274,24 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, DAG.getConstant(CmpMode, dl, MVT::i8)); } + // (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2. + // Revert part of the simplifySetCCWithAnd combine, to avoid an invert. + if (Cond == ISD::SETNE && ISD::isBuildVectorAllZeros(Op1.getNode())) { + SDValue BC0 = peekThroughBitcasts(Op0); + if (BC0.getOpcode() == ISD::AND) { + APInt UndefElts; + SmallVector<APInt, 64> EltBits; + if (getTargetConstantBitsFromNode(BC0.getOperand(1), + VT.getScalarSizeInBits(), UndefElts, + EltBits, false, false)) { + if (llvm::all_of(EltBits, [](APInt &V) { return V.isPowerOf2(); })) { + Cond = ISD::SETEQ; + Op1 = DAG.getBitcast(VT, BC0.getOperand(1)); + } + } + } + } + // We are handling one of the integer comparisons here. Since SSE only has // GT and EQ comparisons for integer, swapping operands and multiple // operations may be required for some comparisons. @@ -34480,6 +34498,95 @@ static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) { CMovN.getOperand(2), CMovN.getOperand(3)); } +// Convert (vXiY *ext(vXi1 bitcast(iX))) to extend_in_reg(broadcast(iX)). +// This is more or less the reverse of combineBitcastvxi1. +static SDValue +combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + unsigned Opcode = N->getOpcode(); + if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND && + Opcode != ISD::ANY_EXTEND) + return SDValue(); + if (!DCI.isBeforeLegalizeOps()) + return SDValue(); + if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) + return SDValue(); + + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT SVT = VT.getScalarType(); + EVT InSVT = N0.getValueType().getScalarType(); + unsigned EltSizeInBits = SVT.getSizeInBits(); + + // Input type must be extending a bool vector (bit-casted from a scalar + // integer) to legal integer types. + if (!VT.isVector()) + return SDValue(); + if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8) + return SDValue(); + if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST) + return SDValue(); + + SDValue N00 = N0.getOperand(0); + EVT SclVT = N0.getOperand(0).getValueType(); + if (!SclVT.isScalarInteger()) + return SDValue(); + + SDLoc DL(N); + SDValue Vec; + SmallVector<int, 32> ShuffleMask; + unsigned NumElts = VT.getVectorNumElements(); + assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size"); + + // Broadcast the scalar integer to the vector elements. + if (NumElts > EltSizeInBits) { + // If the scalar integer is greater than the vector element size, then we + // must split it down into sub-sections for broadcasting. For example: + // i16 -> v16i8 (i16 -> v8i16 -> v16i8) with 2 sub-sections. + // i32 -> v32i8 (i32 -> v8i32 -> v32i8) with 4 sub-sections. + assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); + unsigned Scale = NumElts / EltSizeInBits; + EVT BroadcastVT = + EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); + Vec = DAG.getBitcast(VT, Vec); + + for (unsigned i = 0; i != Scale; ++i) + ShuffleMask.append(EltSizeInBits, i); + } else { + // For smaller scalar integers, we can simply any-extend it to the vector + // element size (we don't care about the upper bits) and broadcast it to all + // elements. + SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT); + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); + ShuffleMask.append(NumElts, 0); + } + Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); + + // Now, mask the relevant bit in each element. + SmallVector<SDValue, 32> Bits; + for (int i = 0; i != NumElts; ++i) { + int BitIdx = (i % EltSizeInBits); + APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1); + Bits.push_back(DAG.getConstant(Bit, DL, SVT)); + } + SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); + Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); + + // Compare against the bitmask and extend the result. + EVT CCVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); + Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ); + Vec = DAG.getSExtOrTrunc(Vec, DL, VT); + + // For SEXT, this is now done, otherwise shift the result down for + // zero-extension. + if (Opcode == ISD::SIGN_EXTEND) + return Vec; + return DAG.getNode(ISD::SRL, DL, VT, Vec, + DAG.getConstant(EltSizeInBits - 1, DL, VT)); +} + /// Convert a SEXT or ZEXT of a vector to a SIGN_EXTEND_VECTOR_INREG or /// ZERO_EXTEND_VECTOR_INREG, this requires the splitting (or concatenating /// with UNDEFs) of the input to vectors of the same size as the target type @@ -34619,6 +34726,9 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) return V; + if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) + return V; + if (Subtarget.hasAVX() && VT.is256BitVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; @@ -34755,6 +34865,9 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineToExtendVectorInReg(N, DAG, DCI, Subtarget)) return V; + if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) + return V; + if (VT.is256BitVector()) if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget)) return R; |