diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-01-11 05:44:56 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-01-11 05:44:56 +0000 |
| commit | b97885cc2eb06a29141b6ea67413f8ff17afcc10 (patch) | |
| tree | 4efe991c68744b4db80a1bd1e47ee6359bfe9361 /llvm/lib/Target | |
| parent | d458ceda24fde3bc0f2f910b80e3bd9f990af2fa (diff) | |
| download | bcm5719-llvm-b97885cc2eb06a29141b6ea67413f8ff17afcc10.tar.gz bcm5719-llvm-b97885cc2eb06a29141b6ea67413f8ff17afcc10.zip | |
[X86] Change vXi1 extract_vector_elt lowering to be legal if the index is 0. Add DAG combine to turn scalar_to_vector+extract_vector_elt into extract_subvector.
We were lowering the last step extract_vector_elt to a bitcast+truncate. Change it to use an extract_vector_elt of index 0 instead. Add isel patterns to do the equivalent of what the bitcast would have done. Plus an isel pattern for an any_extend+extract to prevent some regressions.
Finally add a DAG combine to turn v1i1 scalar_to_vector+extract_vector_elt of 0 into an extract_subvector.
This fixes some of the regressions from D350800.
llvm-svn: 350918
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 46 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 11 |
2 files changed, 34 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index def4be5afb5..c8ed78d0b4e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16296,34 +16296,25 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, } unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (IdxVal == 0) // the operation is legal + return Op; - // If the kshift instructions of the correct width aren't natively supported - // then we need to promote the vector to the native size to get the correct - // zeroing behavior. - if (VecVT.getVectorNumElements() < 16) { - VecVT = MVT::v16i1; - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, - DAG.getUNDEF(VecVT), Vec, + // Extend to natively supported kshift. + unsigned NumElems = VecVT.getVectorNumElements(); + MVT WideVecVT = VecVT; + if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { + WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, + DAG.getUNDEF(WideVecVT), Vec, DAG.getIntPtrConstant(0, dl)); } - // Extracts from element 0 are always allowed. - if (IdxVal != 0) { - // Use kshiftr instruction to move to the lower element. - Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); - } - - // Shrink to v16i1 since that's always legal. - if (VecVT.getVectorNumElements() > 16) { - VecVT = MVT::v16i1; - Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec, - DAG.getIntPtrConstant(0, dl)); - } + // Use kshiftr instruction to move to the lower element. + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); - // Convert to a bitcast+aext/trunc. - MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements()); - return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, + DAG.getIntPtrConstant(0, dl)); } SDValue @@ -41413,6 +41404,15 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1, Src.getOperand(0)); + // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec. + if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() && + Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1) + if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) + if (C->isNullValue()) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, + Src.getOperand(0), Src.getOperand(1)); + return SDValue(); } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 105ca2e87d7..97c9eac1cf7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2889,6 +2889,11 @@ let Predicates = [HasAVX512] in { (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>; } +def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", + SDTypeProfile<1, 2, [SDTCisVT<0, i8>, + SDTCVecEltisVT<1, i1>, + SDTCisPtrTy<2>]>>; + let Predicates = [HasAVX512] in { multiclass operation_gpr_mask_copy_lowering<RegisterClass maskRC, ValueType maskVT> { def : Pat<(maskVT (scalar_to_vector GR32:$src)), @@ -2896,6 +2901,12 @@ let Predicates = [HasAVX512] in { def : Pat<(maskVT (scalar_to_vector GR8:$src)), (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; + + def : Pat<(i8 (X86kextract maskRC:$src, (iPTR 0))), + (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS maskRC:$src, GR32)), sub_8bit)>; + + def : Pat<(i32 (anyext (i8 (X86kextract maskRC:$src, (iPTR 0))))), + (i32 (COPY_TO_REGCLASS maskRC:$src, GR32))>; } defm : operation_gpr_mask_copy_lowering<VK1, v1i1>; |

