summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp142
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td4
2 files changed, 123 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 446098a0711..b1bafba047c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1509,29 +1509,49 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::v16i32, Legal);
if (Subtarget->hasCDI()) {
- setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i64, Legal);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i32, Legal);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i16, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i8, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v16i16, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i8, Custom);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
- }
- if (Subtarget->hasVLX() && Subtarget->hasCDI()) {
- setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
- setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
- setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
- setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Legal);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Legal);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Legal);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Legal);
-
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
- }
+
+ if (Subtarget->hasVLX()) {
+ setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
+ setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Legal);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Legal);
+
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ } else {
+ setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i64, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v8i32, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v2i64, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v4i32, Custom);
+ }
+ } // Subtarget->hasCDI()
+
if (Subtarget->hasDQI()) {
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i64, Legal);
@@ -1605,6 +1625,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Legal);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Legal);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Custom);
@@ -1646,6 +1668,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget->hasVLX())
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
+ if (Subtarget->hasCDI()) {
+ setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
+ setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8, Custom);
+ }
+
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;
@@ -17501,12 +17530,75 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
}
-static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
+//
+// 1. i32/i64 128/256-bit vector (native support require VLX) are expended
+// to 512-bit vector.
+// 2. i8/i16 vector implemented using dword LZCNT vector instruction
+// ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
+// split the vector, perform operation on it's Lo a Hi part and
+// concatenate the results.
+static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) {
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ if (EltVT == MVT::i64 || EltVT == MVT::i32) {
+ // Extend to 512 bit vector.
+ assert((VT.is256BitVector() || VT.is128BitVector()) &&
+ "Unsupported value type for operation");
+
+ MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits());
+ SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
+ DAG.getUNDEF(NewVT),
+ Op.getOperand(0),
+ DAG.getIntPtrConstant(0, dl));
+ SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
+ "Unsupported element type");
+
+ if (16 < NumElems) {
+ // Split vector, it's Lo and Hi parts will be handled in next iteration.
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
+ MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+ Lo = DAG.getNode(Op.getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(Op.getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+ }
+
+ MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
+
+ assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
+ "Unsupported value type for operation");
+
+ // Use native supported vector instruction vplzcntd.
+ Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
+ SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
+ SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
+ SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);
+
+ return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
+}
+
+static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
+ if (VT.isVector() && Subtarget->hasAVX512())
+ return LowerVectorCTLZ_AVX512(Op, DAG);
+
Op = Op.getOperand(0);
if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
@@ -17536,12 +17628,16 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
return Op;
}
-static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
EVT OpVT = VT;
unsigned NumBits = VT.getSizeInBits();
SDLoc dl(Op);
+ if (VT.isVector() && Subtarget->hasAVX512())
+ return LowerVectorCTLZ_AVX512(Op, DAG);
+
Op = Op.getOperand(0);
if (VT == MVT::i8) {
// Zero extend to i32 since there is not an i8 bsr.
@@ -19554,8 +19650,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
- case ISD::CTLZ: return LowerCTLZ(Op, DAG);
- case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
+ case ISD::CTLZ: return LowerCTLZ(Op, Subtarget, DAG);
+ case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, Subtarget, DAG);
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 8a22e2e842d..31a7e95df15 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -786,6 +786,10 @@ def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
(INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
(INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0)),
+ (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0)),
+ (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
// vextractps - extract 32 bits from XMM
def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
OpenPOWER on IntegriCloud