diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 52 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 25 |
2 files changed, 38 insertions, 39 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fb9c177325a..5dd2e520c45 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1352,8 +1352,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64); if (Subtarget.hasCDI()) { + // NonVLX sub-targets extend 128/256 vectors to use the 512 version. setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); + setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); + setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v4i32, Legal); setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); @@ -1362,23 +1367,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom); setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom); - - if (Subtarget.hasVLX()) { - setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); - setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); - setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); - setOperationAction(ISD::CTLZ, MVT::v4i32, Legal); - } else { - setOperationAction(ISD::CTLZ, MVT::v4i64, Custom); - setOperationAction(ISD::CTLZ, MVT::v8i32, Custom); - setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); - setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); - } - - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom); } // Subtarget.hasCDI() if (Subtarget.hasDQI()) { @@ -20981,12 +20973,10 @@ static SDValue Lower512IntUnary(SDValue Op, SelectionDAG &DAG) { /// \brief Lower a vector CTLZ using native supported vector CTLZ instruction. // -// 1. i32/i64 128/256-bit vector (native support require VLX) are expended -// to 512-bit vector. -// 2. i8/i16 vector implemented using dword LZCNT vector instruction -// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, -// split the vector, perform operation on it's Lo a Hi part and -// concatenate the results. +// i8/i16 vector implemented using dword LZCNT vector instruction +// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal, +// split the vector, perform operation on it's Lo a Hi part and +// concatenate the results. static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) { assert(Op.getOpcode() == ISD::CTLZ); SDLoc dl(Op); @@ -20994,22 +20984,6 @@ static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) { MVT EltVT = VT.getVectorElementType(); unsigned NumElems = VT.getVectorNumElements(); - if (EltVT == MVT::i64 || EltVT == MVT::i32) { - // Extend to 512 bit vector. - assert((VT.is256BitVector() || VT.is128BitVector()) && - "Unsupported value type for operation"); - - MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits()); - SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT, - DAG.getUNDEF(NewVT), - Op.getOperand(0), - DAG.getIntPtrConstant(0, dl)); - SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512); - - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode, - DAG.getIntPtrConstant(0, dl)); - } - assert((EltVT == MVT::i8 || EltVT == MVT::i16) && "Unsupported element type"); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c38c13bb975..91eff70ac6a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8639,6 +8639,31 @@ multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{ defm VPLZCNT : avx512_ctlz<0x44, "vplzcnt", HasCDI>; defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; +// VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. +let Predicates = [HasCDI, NoVLX] in { + def : Pat<(v4i64 (ctlz VR256X:$src)), + (EXTRACT_SUBREG + (VPLZCNTQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), + sub_ymm)>; + def : Pat<(v2i64 (ctlz VR128X:$src)), + (EXTRACT_SUBREG + (VPLZCNTQZrr + (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), + sub_xmm)>; + + def : Pat<(v8i32 (ctlz VR256X:$src)), + (EXTRACT_SUBREG + (VPLZCNTDZrr + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)), + sub_ymm)>; + def : Pat<(v4i32 (ctlz VR128X:$src)), + (EXTRACT_SUBREG + (VPLZCNTDZrr + (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm)), + sub_xmm)>; +} + //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// |

