diff options
author | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2017-12-20 20:49:43 +0000 |
---|---|---|
committer | Krzysztof Parzyszek <kparzysz@codeaurora.org> | 2017-12-20 20:49:43 +0000 |
commit | e4ce92cabf7983f0e9a16ebf5d502f529a094db4 (patch) | |
tree | c3b7eb0791e8dc881ee3d812c08bb56eaa24e7cb | |
parent | bb3198949f0d2ab261e2ecaaf1f64ee3be37893b (diff) | |
download | bcm5719-llvm-e4ce92cabf7983f0e9a16ebf5d502f529a094db4.tar.gz bcm5719-llvm-e4ce92cabf7983f0e9a16ebf5d502f529a094db4.zip |
[Hexagon] Allow construction of HVX vector predicates
Handle BUILD_VECTOR of boolean values.
llvm-svn: 321220
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 332 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.h | 12 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 134 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatterns.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonRegisterInfo.td | 5 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonSubtarget.h | 32 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-type.ll | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/isel-bool-vector.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/v60-cur.ll | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/vect/vect-infloop.ll | 10 |
11 files changed, 408 insertions, 170 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index 78336d31d1d..0e0da2ddc40 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1721,8 +1721,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); - addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass); addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass); addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass); @@ -1741,6 +1741,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass); addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass); + // These "short" boolean vector types should be legal because + // they will appear as results of vector compares. If they were + // not legal, type legalization would try to make them legal + // and that would require using operations that do not use or + // produce such types. That, in turn, would imply using custom + // nodes, which would be unoptimizable by the DAG combiner. + // The idea is to rely on target-independent operations as much + // as possible. addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass); addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass); @@ -1970,9 +1978,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal); // Types natively supported: - for (MVT NativeVT : {MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v32i1, MVT::v64i1, - MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v1i32, - MVT::v2i32, MVT::v1i64}) { + for (MVT NativeVT : {MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16, + MVT::v4i16, MVT::v1i32, MVT::v2i32, MVT::v1i64}) { setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom); @@ -1998,63 +2005,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, AddPromotedToType(Opc, FromTy, ToTy); }; - if (Subtarget.useHVXOps()) { - bool Use64b = Subtarget.useHVX64BOps(); - ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; - ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; - MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; - MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; - - setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); - setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); - setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal); - setOperationAction(ISD::AND, ByteV, Legal); - setOperationAction(ISD::OR, ByteV, Legal); - setOperationAction(ISD::XOR, ByteV, Legal); - - for (MVT T : LegalV) { - setIndexedLoadAction(ISD::POST_INC, T, Legal); - setIndexedStoreAction(ISD::POST_INC, T, Legal); - - setOperationAction(ISD::ADD, T, Legal); - setOperationAction(ISD::SUB, T, Legal); - setOperationAction(ISD::VSELECT, T, Legal); - if (T != ByteV) { - setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); - setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); - } - - setOperationAction(ISD::MUL, T, Custom); - setOperationAction(ISD::SETCC, T, Custom); - setOperationAction(ISD::BUILD_VECTOR, T, Custom); - setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); - setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); - setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); - if (T != ByteV) - setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); - } - - for (MVT T : LegalV) { - if (T == ByteV) - continue; - // Promote all shuffles and concats to operate on vectors of bytes. - setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); - setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV); - setPromoteTo(ISD::AND, T, ByteV); - setPromoteTo(ISD::OR, T, ByteV); - setPromoteTo(ISD::XOR, T, ByteV); - } - - for (MVT T : LegalW) { - if (T == ByteW) - continue; - // Promote all shuffles and concats to operate on vectors of bytes. - setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); - setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW); - } - } - // Subtarget-specific operation actions. // if (Subtarget.hasV5TOps()) { @@ -2116,6 +2066,67 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setIndexedStoreAction(ISD::POST_INC, VT, Legal); } + if (Subtarget.useHVXOps()) { + bool Use64b = Subtarget.useHVX64BOps(); + ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; + ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; + MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; + MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; + + setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal); + setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal); + setOperationAction(ISD::AND, ByteV, Legal); + setOperationAction(ISD::OR, ByteV, Legal); + setOperationAction(ISD::XOR, ByteV, Legal); + + for (MVT T : LegalV) { + setIndexedLoadAction(ISD::POST_INC, T, Legal); + setIndexedStoreAction(ISD::POST_INC, T, Legal); + + setOperationAction(ISD::ADD, T, Legal); + setOperationAction(ISD::SUB, T, Legal); + if (T != ByteV) { + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal); + } + + setOperationAction(ISD::MUL, T, Custom); + setOperationAction(ISD::SETCC, T, Custom); + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom); + setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom); + if (T != ByteV) + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom); + } + + for (MVT T : LegalV) { + if (T == ByteV) + continue; + // Promote all shuffles and concats to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); + setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV); + setPromoteTo(ISD::AND, T, ByteV); + setPromoteTo(ISD::OR, T, ByteV); + setPromoteTo(ISD::XOR, T, ByteV); + } + + for (MVT T : LegalW) { + // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- + // independent) handling of it would convert it to a load, which is + // not always the optimal choice. + setOperationAction(ISD::BUILD_VECTOR, T, Custom); + + if (T == ByteW) + continue; + // Promote all shuffles and concats to operate on vectors of bytes. + setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); + setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW); + } + } + computeRegisterProperties(&HRI); // @@ -2262,6 +2273,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0"; case HexagonISD::VROR: return "HexagonISD::VROR"; case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE"; + case HexagonISD::VZERO: return "HexagonISD::VZERO"; case HexagonISD::OP_END: break; } return nullptr; @@ -2337,14 +2349,27 @@ bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, TargetLoweringBase::LegalizeTypeAction HexagonTargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.getVectorNumElements() == 1) + return TargetLoweringBase::TypeScalarizeVector; + + // Always widen vectors of i1. + MVT ElemTy = VT.getSimpleVT().getVectorElementType(); + if (ElemTy == MVT::i1) + return TargetLoweringBase::TypeWidenVector; + if (Subtarget.useHVXOps()) { // If the size of VT is at least half of the vector length, // widen the vector. Note: the threshold was not selected in // any scientific way. - if (VT.getSizeInBits() >= Subtarget.getVectorLength()*8/2) - return TargetLoweringBase::TypeWidenVector; + ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes(); + if (llvm::find(Tys, ElemTy) != Tys.end()) { + unsigned HwWidth = 8*Subtarget.getVectorLength(); + unsigned VecWidth = VT.getSizeInBits(); + if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) + return TargetLoweringBase::TypeWidenVector; + } } - return TargetLowering::getPreferredVectorAction(VT); + return TargetLoweringBase::TypeSplitVector; } // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors @@ -2469,21 +2494,43 @@ HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, dl, VT, Result); } +bool +HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values, + MVT VecTy, SelectionDAG &DAG, + MutableArrayRef<ConstantInt*> Consts) const { + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth); + bool AllConst = true; + + for (unsigned i = 0, e = Values.size(); i != e; ++i) { + SDValue V = Values[i]; + if (V.isUndef()) { + Consts[i] = ConstantInt::get(IntTy, 0); + continue; + } + if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) { + const ConstantInt *CI = CN->getConstantIntValue(); + Consts[i] = const_cast<ConstantInt*>(CI); + } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) { + const ConstantFP *CF = CN->getConstantFPValue(); + APInt A = CF->getValueAPF().bitcastToAPInt(); + Consts[i] = ConstantInt::get(IntTy, A.getZExtValue()); + } else { + AllConst = false; + } + } + return AllConst; +} + SDValue HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const { MVT ElemTy = VecTy.getVectorElementType(); assert(VecTy.getVectorNumElements() == Elem.size()); - SmallVector<ConstantSDNode*,4> Consts; - bool AllConst = true; - for (SDValue V : Elem) { - if (isUndef(V)) - V = DAG.getConstant(0, dl, ElemTy); - auto *C = dyn_cast<ConstantSDNode>(V.getNode()); - Consts.push_back(C); - AllConst = AllConst && C != nullptr; - } + SmallVector<ConstantInt*,4> Consts(Elem.size()); + bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts); unsigned First, Num = Elem.size(); for (First = 0; First != Num; ++First) @@ -2492,6 +2539,10 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, if (First == Num) return DAG.getUNDEF(VecTy); + if (AllConst && + llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) + return getZero(dl, VecTy, DAG); + if (ElemTy == MVT::i16) { assert(Elem.size() == 2); if (AllConst) { @@ -2504,49 +2555,55 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, return DAG.getBitcast(MVT::v2i16, N); } - // First try generating a constant. - assert(ElemTy == MVT::i8 && Num == 4); - if (AllConst) { - int32_t V = (Consts[0]->getZExtValue() & 0xFF) | - (Consts[1]->getZExtValue() & 0xFF) << 8 | - (Consts[1]->getZExtValue() & 0xFF) << 16 | - Consts[2]->getZExtValue() << 24; - return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32)); - } + if (ElemTy == MVT::i8) { + // First try generating a constant. + if (AllConst) { + int32_t V = (Consts[0]->getZExtValue() & 0xFF) | + (Consts[1]->getZExtValue() & 0xFF) << 8 | + (Consts[1]->getZExtValue() & 0xFF) << 16 | + Consts[2]->getZExtValue() << 24; + return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32)); + } - // Then try splat. - bool IsSplat = true; - for (unsigned i = 0; i != Num; ++i) { - if (i == First) - continue; - if (Elem[i] == Elem[First] || isUndef(Elem[i])) - continue; - IsSplat = false; - break; - } - if (IsSplat) { - // Legalize the operand to VSPLAT. - SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32); - return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext); - } + // Then try splat. + bool IsSplat = true; + for (unsigned i = 0; i != Num; ++i) { + if (i == First) + continue; + if (Elem[i] == Elem[First] || isUndef(Elem[i])) + continue; + IsSplat = false; + break; + } + if (IsSplat) { + // Legalize the operand to VSPLAT. + SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32); + return DAG.getNode(HexagonISD::VSPLAT, dl, VecTy, Ext); + } + + // Generate + // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) | + // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16 + assert(Elem.size() == 4); + SDValue Vs[4]; + for (unsigned i = 0; i != 4; ++i) { + Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32); + Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8); + } + SDValue S8 = DAG.getConstant(8, dl, MVT::i32); + SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8}); + SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8}); + SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0}); + SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1}); - // Generate - // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) | - // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16 - assert(Elem.size() == 4); - SDValue Vs[4]; - for (unsigned i = 0; i != 4; ++i) { - Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32); - Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8); + SDValue R = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG); + return DAG.getBitcast(MVT::v4i8, R); } - SDValue S8 = DAG.getConstant(8, dl, MVT::i32); - SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8}); - SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8}); - SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0}); - SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1}); - SDValue R = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG); - return DAG.getBitcast(MVT::v4i8, R); +#ifndef NDEBUG + dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n'; +#endif + llvm_unreachable("Unexpected vector element type"); } SDValue @@ -2555,15 +2612,8 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT ElemTy = VecTy.getVectorElementType(); assert(VecTy.getVectorNumElements() == Elem.size()); - SmallVector<ConstantSDNode*,8> Consts; - bool AllConst = true; - for (SDValue V : Elem) { - if (isUndef(V)) - V = DAG.getConstant(0, dl, ElemTy); - auto *C = dyn_cast<ConstantSDNode>(V.getNode()); - Consts.push_back(C); - AllConst = AllConst && C != nullptr; - } + SmallVector<ConstantInt*,8> Consts(Elem.size()); + bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts); unsigned First, Num = Elem.size(); for (First = 0; First != Num; ++First) @@ -2572,6 +2622,10 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, if (First == Num) return DAG.getUNDEF(VecTy); + if (AllConst && + llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) + return getZero(dl, VecTy, DAG); + // First try splat if possible. if (ElemTy == MVT::i16) { bool IsSplat = true; @@ -2606,10 +2660,10 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2); SDValue L = (ElemTy == MVT::i32) ? Elem[0] - : buildVector32({Elem.data(), Num/2}, dl, HalfTy, DAG); + : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG); SDValue H = (ElemTy == MVT::i32) ? Elem[1] - : buildVector32({Elem.data()+Num/2, Num/2}, dl, HalfTy, DAG); + : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG); return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L}); } @@ -2709,21 +2763,41 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV, } SDValue +HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) + const { + if (Ty.isVector()) { + assert(Ty.isInteger() && "Only integer vectors are supported here"); + unsigned W = Ty.getSizeInBits(); + if (W <= 64) + return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W))); + return DAG.getNode(HexagonISD::VZERO, dl, Ty); + } + + if (Ty.isInteger()) + return DAG.getConstant(0, dl, Ty); + if (Ty.isFloatingPoint()) + return DAG.getConstantFP(0.0, dl, Ty); + llvm_unreachable("Invalid type for zero"); +} + +SDValue HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { MVT VecTy = ty(Op); unsigned BW = VecTy.getSizeInBits(); + + if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true)) + return LowerHvxBuildVector(Op, DAG); + if (BW == 32 || BW == 64) { + const SDLoc &dl(Op); SmallVector<SDValue,8> Ops; for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) Ops.push_back(Op.getOperand(i)); if (BW == 32) - return buildVector32(Ops, SDLoc(Op), VecTy, DAG); - return buildVector64(Ops, SDLoc(Op), VecTy, DAG); + return buildVector32(Ops, dl, VecTy, DAG); + return buildVector64(Ops, dl, VecTy, DAG); } - if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy)) - return LowerHvxBuildVector(Op, DAG); - return SDValue(); } @@ -2835,7 +2909,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { #ifndef NDEBUG Op.getNode()->dumpr(&DAG); if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END) - errs() << "Check for a non-legal type in this operation\n"; + errs() << "Error: check for a non-legal type in this operation\n"; #endif llvm_unreachable("Should not custom lower this!"); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 0619e2e4e7f..732834b464b 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -70,6 +70,7 @@ namespace HexagonISD { EH_RETURN, DCFETCH, READCYCLE, + VZERO, OP_END }; @@ -283,6 +284,9 @@ namespace HexagonISD { } private: + bool getBuildVectorConstInts(ArrayRef<SDValue> Values, MVT VecTy, + SelectionDAG &DAG, + MutableArrayRef<ConstantInt*> Consts) const; SDValue buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy, SelectionDAG &DAG) const; SDValue buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl, MVT VecTy, @@ -301,6 +305,7 @@ namespace HexagonISD { SDNode *N = DAG.getMachineNode(MachineOpc, dl, Ty, Ops); return SDValue(N, 0); } + SDValue getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG) const; using VectorPair = std::pair<SDValue, SDValue>; using TypePair = std::pair<MVT, MVT>; @@ -344,6 +349,13 @@ namespace HexagonISD { SDValue getByteShuffle(const SDLoc &dl, SDValue Op0, SDValue Op1, ArrayRef<int> Mask, SelectionDAG &DAG) const; + MVT getVecBoolVT() const; + + SDValue buildHvxVectorSingle(ArrayRef<SDValue> Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + SDValue buildHvxVectorPred(ArrayRef<SDValue> Values, const SDLoc &dl, + MVT VecTy, SelectionDAG &DAG) const; + SDValue LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index c1d44cb0e7d..51480d09d73 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -141,49 +141,50 @@ HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, opCastElem(Op1, MVT::i8, DAG), ByteMask); } +MVT +HexagonTargetLowering::getVecBoolVT() const { + return MVT::getVectorVT(MVT::i1, 8*Subtarget.getVectorLength()); +} + SDValue -HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) - const { - const SDLoc &dl(Op); - BuildVectorSDNode *BN = cast<BuildVectorSDNode>(Op.getNode()); - bool IsConst = BN->isConstant(); +HexagonTargetLowering::buildHvxVectorSingle(ArrayRef<SDValue> Values, + const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const { + unsigned VecLen = Values.size(); MachineFunction &MF = DAG.getMachineFunction(); - MVT VecTy = ty(Op); + MVT ElemTy = VecTy.getVectorElementType(); + unsigned ElemWidth = ElemTy.getSizeInBits(); + unsigned HwLen = Subtarget.getVectorLength(); - if (IsConst) { - SmallVector<Constant*, 128> Elems; - for (SDValue V : BN->op_values()) { - if (auto *C = dyn_cast<ConstantSDNode>(V.getNode())) - Elems.push_back(const_cast<ConstantInt*>(C->getConstantIntValue())); - } - Constant *CV = ConstantVector::get(Elems); - unsigned Align = VecTy.getSizeInBits() / 8; + SmallVector<ConstantInt*, 128> Consts(VecLen); + bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); + if (AllConst) { + if (llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); })) + return getZero(dl, VecTy, DAG); + + ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), + (Constant**)Consts.end()); + Constant *CV = ConstantVector::get(Tmp); + unsigned Align = HwLen; SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG); return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(MF), Align); } - unsigned NumOps = Op.getNumOperands(); - unsigned HwLen = Subtarget.getVectorLength(); - unsigned ElemSize = VecTy.getVectorElementType().getSizeInBits() / 8; - assert(ElemSize*NumOps == HwLen); - + unsigned ElemSize = ElemWidth / 8; + assert(ElemSize*VecLen == HwLen); SmallVector<SDValue,32> Words; - SmallVector<SDValue,32> Ops; - for (unsigned i = 0; i != NumOps; ++i) - Ops.push_back(Op.getOperand(i)); if (VecTy.getVectorElementType() != MVT::i32) { - assert(ElemSize < 4 && "vNi64 should have been promoted to vNi32"); assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size"); unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord); - for (unsigned i = 0; i != NumOps; i += OpsPerWord) { - SDValue W = buildVector32({&Ops[i], OpsPerWord}, dl, PartVT, DAG); + for (unsigned i = 0; i != VecLen; i += OpsPerWord) { + SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG); Words.push_back(DAG.getBitcast(MVT::i32, W)); } } else { - Words.assign(Ops.begin(), Ops.end()); + Words.assign(Values.begin(), Values.end()); } // Construct two halves in parallel, then or them together. @@ -208,6 +209,83 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) } SDValue +HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, + const SDLoc &dl, MVT VecTy, + SelectionDAG &DAG) const { + // Construct a vector V of bytes, such that a comparison V >u 0 would + // produce the required vector predicate. + unsigned VecLen = Values.size(); + unsigned HwLen = Subtarget.getVectorLength(); + assert(VecLen <= HwLen || VecLen == 8*HwLen); + SmallVector<SDValue,128> Bytes; + + if (VecLen <= HwLen) { + // In the hardware, each bit of a vector predicate corresponds to a byte + // of a vector register. Calculate how many bytes does a bit of VecTy + // correspond to. + assert(HwLen % VecLen == 0); + unsigned BitBytes = HwLen / VecLen; + for (SDValue V : Values) { + SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8) + : DAG.getConstant(0, dl, MVT::i8); + for (unsigned B = 0; B != BitBytes; ++B) + Bytes.push_back(Ext); + } + } else { + // There are as many i1 values, as there are bits in a vector register. + // Divide the values into groups of 8 and check that each group consists + // of the same value (ignoring undefs). + for (unsigned I = 0; I != VecLen; I += 8) { + unsigned B = 0; + // Find the first non-undef value in this group. + for (; B != 8; ++B) { + if (!Values[I+B].isUndef()) + break; + } + SDValue F = Values[I+B]; + SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8) + : DAG.getConstant(0, dl, MVT::i8); + Bytes.push_back(Ext); + // Verify that the rest of values in the group are the same as the + // first. + for (; B != 8; ++B) + assert(Values[I+B].isUndef() || Values[I+B] == F); + } + } + + MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen); + SDValue ByteVec = buildHvxVectorSingle(Bytes, dl, ByteTy, DAG); + SDValue Cmp = DAG.getSetCC(dl, VecTy, ByteVec, getZero(dl, ByteTy, DAG), + ISD::SETUGT); + return Cmp; +} + +SDValue +HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) + const { + const SDLoc &dl(Op); + MVT VecTy = ty(Op); + + unsigned Size = Op.getNumOperands(); + SmallVector<SDValue,128> Ops; + for (unsigned i = 0; i != Size; ++i) + Ops.push_back(Op.getOperand(i)); + + if (VecTy.getVectorElementType() == MVT::i1) + return buildHvxVectorPred(Ops, dl, VecTy, DAG); + + if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { + ArrayRef<SDValue> A(Ops); + MVT SingleTy = typeSplit(VecTy).first; + SDValue V0 = buildHvxVectorSingle(A.take_front(Size/2), dl, SingleTy, DAG); + SDValue V1 = buildHvxVectorSingle(A.drop_front(Size/2), dl, SingleTy, DAG); + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1); + } + + return buildHvxVectorSingle(Ops, dl, VecTy, DAG); +} + +SDValue HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG) const { // Change the type of the extracted element to i32. @@ -399,6 +477,10 @@ HexagonTargetLowering::LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const { // (negate (swap-op NewCmp)), // the condition code for the NewCmp should be calculated from the original // CC by applying these operations in the reverse order. + // + // This could also be done through setCondCodeAction, but for negation it + // uses a xor with a vector of -1s, which it obtains from BUILD_VECTOR. + // That is far too expensive for what can be done with a single instruction. switch (CC) { case ISD::SETNE: // !eq diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index e2120d3de2e..cdc2085986a 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -2899,6 +2899,8 @@ def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; +def SDTVecLeaf: SDTypeProfile<1, 0, [SDTCisVec<0>]>; + def SDTHexagonVEXTRACTW: SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; def HexagonVEXTRACTW : SDNode<"HexagonISD::VEXTRACTW", SDTHexagonVEXTRACTW>; @@ -2920,7 +2922,14 @@ let Predicates = [UseHVX] in { def: OpR_RR_pat<V6_vpackoh, pf2<HexagonVPACKO>, VecI16, HVI16>; } +def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; +def vzero: PatFrag<(ops), (HexagonVZERO)>; + let Predicates = [UseHVX] in { + def: Pat<(VecI8 vzero), (V6_vd0)>; + def: Pat<(VecI16 vzero), (V6_vd0)>; + def: Pat<(VecI32 vzero), (V6_vd0)>; + def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td index 2ceed70c249..1d1e85e7ac7 100644 --- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td @@ -242,7 +242,7 @@ def VecQ32 // FIXME: the register order should be defined in terms of the preferred // allocation order... // -def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32, +def IntRegs : RegisterClass<"Hexagon", [i32, f32, v32i1, v4i8, v2i16], 32, (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10, R11, R29, R30, R31)>; @@ -254,7 +254,8 @@ def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32, def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32, (add R7, R6, R5, R4, R3, R2, R1, R0)> ; -def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64, +def DoubleRegs : RegisterClass<"Hexagon", + [i64, f64, v64i1, v8i8, v4i16, v2i32], 64, (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>; def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64, diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 678ef210d0a..0b4333c1bfc 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -204,14 +204,38 @@ public: llvm_unreachable("Invalid HVX vector length settings"); } - bool isHVXVectorType(MVT VecTy) const { + ArrayRef<MVT> getHVXElementTypes() const { + static MVT Types[] = { MVT::i8, MVT::i16, MVT::i32 }; + return Types; + } + + bool isHVXVectorType(MVT VecTy, bool IncludeBool = false) const { if (!VecTy.isVector() || !useHVXOps()) return false; - unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits(); - if (ElemWidth < 8 || ElemWidth > 64) + MVT ElemTy = VecTy.getVectorElementType(); + if (!IncludeBool && ElemTy == MVT::i1) + return false; + + unsigned HwLen = getVectorLength(); + unsigned NumElems = VecTy.getVectorNumElements(); + ArrayRef<MVT> ElemTypes = getHVXElementTypes(); + + if (IncludeBool && ElemTy == MVT::i1) { + // Special case for the v512i1, etc. + if (8*HwLen == NumElems) + return true; + // Boolean HVX vector types are formed from regular HVX vector types + // by replacing the element type with i1. + for (MVT T : ElemTypes) + if (NumElems * T.getSizeInBits() == 8*HwLen) + return true; return false; + } + unsigned VecWidth = VecTy.getSizeInBits(); - return VecWidth == 8*getVectorLength() || VecWidth == 16*getVectorLength(); + if (VecWidth != 8*HwLen && VecWidth != 16*HwLen) + return false; + return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; }); } unsigned getL1CacheLineSize() const; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-type.ll b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-type.ll new file mode 100644 index 00000000000..f96dbf2af49 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/build-vector-i32-type.ll @@ -0,0 +1,19 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this doesn't crash. +; CHECK: sfcmp + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define void @fred() #0 { +b0: + %v1 = load <16 x float>, <16 x float>* null, align 8 + %v2 = fcmp olt <16 x float> undef, %v1 + %v3 = select <16 x i1> %v2, <16 x i16> undef, <16 x i16> zeroinitializer + %v4 = sext <16 x i16> %v3 to <16 x i32> + store <16 x i32> %v4, <16 x i32>* undef, align 64 + unreachable +} + +attributes #0 = { noinline norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b,+hvxv60" } diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-bool-vector.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-bool-vector.ll new file mode 100644 index 00000000000..4cbd00837fc --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-bool-vector.ll @@ -0,0 +1,18 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; Check that this testcase doesn't crash. +; CHECK: sfcmp + +target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048" +target triple = "hexagon" + +define void @fred() #0 { +b0: + %v1 = fcmp olt <16 x float> zeroinitializer, undef + %v2 = select <16 x i1> %v1, <16 x i16> undef, <16 x i16> zeroinitializer + %v3 = sext <16 x i16> %v2 to <16 x i32> + store <16 x i32> %v3, <16 x i32>* undef, align 128 + unreachable +} + +attributes #0 = { noinline norecurse nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length64b" } diff --git a/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll b/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll index 88eaec938fd..5ac0f59bd2d 100644 --- a/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll +++ b/llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll @@ -12,7 +12,7 @@ ; CHECK-LABEL: fred: ; CHECK: v[[REG:[0-9]+]] = vsplat -; CHECK: vmem(r29+#6) = v[[REG]] +; CHECK: vmem(r29+#{{[0-9]+}}) = v[[REG]] target triple = "hexagon" diff --git a/llvm/test/CodeGen/Hexagon/v60-cur.ll b/llvm/test/CodeGen/Hexagon/v60-cur.ll index 26d40c9a697..d0ffe1d8fdd 100644 --- a/llvm/test/CodeGen/Hexagon/v60-cur.ll +++ b/llvm/test/CodeGen/Hexagon/v60-cur.ll @@ -1,9 +1,8 @@ -; RUN: llc -march=hexagon -enable-pipeliner=false < %s | FileCheck %s +; RUN: llc -march=hexagon < %s | FileCheck %s ; Test that we generate a .cur -; CHECK: v{{[0-9]*}}.cur{{ *}} -; CHECK: v{{[0-9]*}}.cur{{ *}} +; CHECK: v{{[0-9]*}}.cur define void @conv3x3_i(i8* noalias nocapture readonly %iptr0, i32 %shift, i32 %width) #0 { entry: diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-infloop.ll b/llvm/test/CodeGen/Hexagon/vect/vect-infloop.ll index 4de390159fd..9ee0b0ab3aa 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-infloop.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-infloop.ll @@ -1,10 +1,10 @@ ; Extracted from test/CodeGen/Generic/vector-casts.ll: used to loop indefinitely. ; RUN: llc -march=hexagon < %s | FileCheck %s -; CHECK: combine +; CHECK: convert_df2w define void @a(<2 x double>* %p, <2 x i8>* %q) { - %t = load <2 x double>, <2 x double>* %p - %r = fptosi <2 x double> %t to <2 x i8> - store <2 x i8> %r, <2 x i8>* %q - ret void + %t = load <2 x double>, <2 x double>* %p + %r = fptosi <2 x double> %t to <2 x i8> + store <2 x i8> %r, <2 x i8>* %q + ret void } |