diff options
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 64 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 15 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 111 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.h | 6 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 60 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatterns.td | 131 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/Hexagon/HexagonSubtarget.h | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/select-instr-align.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll | 4 |
14 files changed, 294 insertions, 127 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 54e501a6638..3540cf06b9c 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -93,12 +93,16 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) { Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io; break; case MVT::i32: + case MVT::v2i16: + case MVT::v4i8: Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io; break; case MVT::i64: + case MVT::v2i32: + case MVT::v4i16: + case MVT::v8i8: Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io; break; - // 64B case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: @@ -377,9 +381,14 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) { Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io; break; case MVT::i32: + case MVT::v2i16: + case MVT::v4i8: Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io; break; case MVT::i64: + case MVT::v2i32: + case MVT::v4i16: + case MVT::v8i8: Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io; break; case MVT::v64i8: @@ -657,6 +666,57 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) { CurDAG->RemoveDeadNode(N); } +void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) { + MVT ResTy = N->getValueType(0).getSimpleVT(); + if (HST->isHVXVectorType(ResTy, true)) + return SelectHvxVAlign(N); + + const SDLoc &dl(N); + unsigned VecLen = ResTy.getSizeInBits(); + if (VecLen == 32) { + SDValue Ops[] = { + CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), + N->getOperand(0), + CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), + N->getOperand(1), + CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) + }; + SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, + MVT::i64, Ops); + + // Shift right by "(Addr & 0x3) * 8" bytes. + SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32); + SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32); + SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32, + M0, N->getOperand(2), M1); + SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64, + SDValue(R, 0), SDValue(C, 0)); + SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy, + SDValue(S, 0)); + ReplaceNode(N, E.getNode()); + } else { + assert(VecLen == 64); + SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1, + N->getOperand(2)); + SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy, + N->getOperand(0), N->getOperand(1), + SDValue(Pu,0)); + ReplaceNode(N, VA); + } +} + +void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) { + const SDLoc &dl(N); + SDValue A = N->getOperand(1); + int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue(); + assert(isPowerOf2_32(-Mask)); + + SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32); + SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32, + N->getOperand(0), M); + ReplaceNode(N, AA); +} + // Handle these nodes here to avoid having to write patterns for all // combinations of input/output types. In all cases, the resulting // instruction is the same. @@ -721,6 +781,8 @@ void HexagonDAGToDAGISel::Select(SDNode *N) { case ISD::STORE: return SelectStore(N); case ISD::INTRINSIC_W_CHAIN: return SelectIntrinsicWChain(N); case ISD::INTRINSIC_WO_CHAIN: return SelectIntrinsicWOChain(N); + case HexagonISD::VALIGN: return SelectVAlign(N); + case HexagonISD::VALIGNADDR: return SelectVAlignAddr(N); case HexagonISD::TYPECAST: return SelectTypecast(N); case HexagonISD::P2D: return SelectP2D(N); case HexagonISD::D2P: return SelectD2P(N); diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h index dd2c6f4fc95..e031b08f671 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h @@ -104,6 +104,8 @@ public: void SelectV65Gather(SDNode *N); void SelectV65GatherPred(SDNode *N); void SelectHVXDualOutput(SDNode *N); + void SelectVAlign(SDNode *N); + void SelectVAlignAddr(SDNode *N); void SelectTypecast(SDNode *N); void SelectP2D(SDNode *N); void SelectD2P(SDNode *N); @@ -127,6 +129,7 @@ private: void SelectHvxShuffle(SDNode *N); void SelectHvxRor(SDNode *N); + void SelectHvxVAlign(SDNode *N); bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src); bool isAlignedMemNode(const MemSDNode *N) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 9b58e00cbe0..46f5bb4de8a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -820,6 +820,7 @@ namespace llvm { void selectShuffle(SDNode *N); void selectRor(SDNode *N); + void selectVAlign(SDNode *N); private: void materialize(const ResultStack &Results); @@ -2011,6 +2012,16 @@ void HvxSelector::selectRor(SDNode *N) { DAG.RemoveDeadNode(N); } +void HvxSelector::selectVAlign(SDNode *N) { + SDValue Vv = N->getOperand(0); + SDValue Vu = N->getOperand(1); + SDValue Rt = N->getOperand(2); + SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N), + N->getValueType(0), {Vv, Vu, Rt}); + ISel.ReplaceNode(N, NewN); + DAG.RemoveDeadNode(N); +} + void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) { HvxSelector(*this, *CurDAG).selectShuffle(N); } @@ -2019,6 +2030,10 @@ void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) { HvxSelector(*this, *CurDAG).selectRor(N); } +void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) { + HvxSelector(*this, *CurDAG).selectVAlign(N); +} + void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) { const SDLoc &dl(N); SDValue Chain = N->getOperand(0); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index b7bc2627596..69f5e1ffe52 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -103,6 +103,10 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::ZeroOrMore, cl::init(4), cl::desc("Max #stores to inline memset")); +static cl::opt<bool> AlignLoads("hexagon-align-loads", + cl::Hidden, cl::init(false), + cl::desc("Rewrite unaligned loads as a pair of aligned loads")); + namespace { @@ -544,8 +548,9 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, EVT VT = LSN->getMemoryVT(); if (!VT.isSimple()) return false; - bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || - VT == MVT::i32 || VT == MVT::i64 || + bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || + VT == MVT::i64 || VT == MVT::v2i16 || MVT::v2i32 || + VT == MVT::v4i8 || VT == MVT::v4i16 || MVT::v8i8 || Subtarget.isHVXVectorType(VT.getSimpleVT()); if (!IsLegalType) return false; @@ -1495,6 +1500,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::XOR, NativeVT, Legal); } + // Custom lower unaligned loads. + for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8, + MVT::v2i16, MVT::v4i16, MVT::v2i32}) { + setOperationAction(ISD::LOAD, VecVT, Custom); + } + // Custom-lower bitcasts from i8 to v8i1. setOperationAction(ISD::BITCAST, MVT::i8, Custom); setOperationAction(ISD::SETCC, MVT::v2i16, Custom); @@ -1559,7 +1570,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, // Handling of indexed loads/stores: default is "expand". // - for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { + for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::v2i16, + MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) { setIndexedLoadAction(ISD::POST_INC, VT, Legal); setIndexedStoreAction(ISD::POST_INC, VT, Legal); } @@ -1718,6 +1730,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::QTRUE: return "HexagonISD::QTRUE"; case HexagonISD::QFALSE: return "HexagonISD::QFALSE"; case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST"; + case HexagonISD::VALIGN: return "HexagonISD::VALIGN"; case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR"; case HexagonISD::OP_END: break; } @@ -2519,6 +2532,90 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { } SDValue +HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) + const { + LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); + unsigned HaveAlign = LN->getAlignment(); + MVT LoadTy = ty(Op); + unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy); + if (HaveAlign >= NeedAlign) + return Op; + // Indexed loads/stores are created after legalizing operations, so we + // shouldn't be getting unaligned post-incrementing loads at this point. + assert(LN->isUnindexed() && "Expecting only unindexed loads"); + + const SDLoc &dl(Op); + const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &Ctx = *DAG.getContext(); + unsigned AS = LN->getAddressSpace(); + + // If the load aligning is disabled or the load can be broken up into two + // smaller legal loads, do the default (target-independent) expansion. + bool DoDefault = false; + if (!AlignLoads) { + if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign)) + return Op; + DoDefault = true; + } + if (!DoDefault && 2*HaveAlign == NeedAlign) { + // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)". + MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign) + : MVT::getVectorVT(MVT::i8, HaveAlign); + DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign); + } + if (DoDefault) { + std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG); + return DAG.getMergeValues({P.first, P.second}, dl); + } + + // The code below generates two loads, both aligned as NeedAlign, and + // with the distance of NeedAlign between them. For that to cover the + // bits that need to be loaded (and without overlapping), the size of + // the loads should be equal to NeedAlign. This is true for all loadable + // types, but add an assertion in case something changes in the future. + assert(LoadTy.getSizeInBits() == 8*NeedAlign); + + unsigned LoadLen = NeedAlign; + SDValue Base = LN->getBasePtr(); + SDValue Chain = LN->getChain(); + auto BO = getBaseAndOffset(Base); + unsigned BaseOpc = BO.first.getOpcode(); + if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0) + return Op; + + if (BO.second % LoadLen != 0) { + BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first, + DAG.getConstant(BO.second % LoadLen, dl, MVT::i32)); + BO.second -= BO.second % LoadLen; + } + SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR) + ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first, + DAG.getConstant(NeedAlign, dl, MVT::i32)) + : BO.first; + SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl); + SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl); + + MachineMemOperand *WideMMO = nullptr; + if (MachineMemOperand *MMO = LN->getMemOperand()) { + MachineFunction &MF = DAG.getMachineFunction(); + WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(), + 2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(), + MMO->getSyncScopeID(), MMO->getOrdering(), + MMO->getFailureOrdering()); + } + + SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO); + SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO); + + SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy, + {Load1, Load0, BaseNoOff.getOperand(0)}); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Load0.getValue(1), Load1.getValue(1)); + SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl); + return M; +} + +SDValue HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Offset = Op.getOperand(1); @@ -2553,8 +2650,11 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { if (Opc == ISD::INLINEASM) return LowerINLINEASM(Op, DAG); - if (isHvxOperation(Op)) - return LowerHvxOperation(Op, DAG); + if (isHvxOperation(Op)) { + // If HVX lowering returns nothing, try the default lowering. + if (SDValue V = LowerHvxOperation(Op, DAG)) + return V; + } switch (Opc) { default: @@ -2572,6 +2672,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); case ISD::BITCAST: return LowerBITCAST(Op, DAG); + case ISD::LOAD: return LowerUnalignedLoad(Op, DAG); case ISD::SRA: case ISD::SHL: case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8ba08564547..607d8528f63 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -79,7 +79,9 @@ namespace HexagonISD { VZERO, TYPECAST, // No-op that's used to convert between different legal // types in a register. - VALIGNADDR, // Align vector address: Op & -HwLen, except when it is + VALIGN, // Align two vectors (in Op0, Op1) to one that would have + // been loaded from address in Op2. + VALIGNADDR, // Align vector address: Op0 & -Op1, except when it is // an address in a vector load, then it's a no-op. OP_END }; @@ -153,6 +155,7 @@ namespace HexagonISD { SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; @@ -418,7 +421,6 @@ namespace HexagonISD { SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const; SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerHvxUnalignedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const; SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index b9e285131b3..58fd740d7a8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -14,10 +14,6 @@ using namespace llvm; -static cl::opt<bool> ExpandUnalignedLoads("hvx-expand-unaligned-loads", - cl::Hidden, cl::init(true), - cl::desc("Expand unaligned HVX loads into a pair of aligned loads")); - static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; @@ -1296,59 +1292,6 @@ HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { } SDValue -HexagonTargetLowering::LowerHvxUnalignedLoad(SDValue Op, SelectionDAG &DAG) - const { - LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); - unsigned HaveAlign = LN->getAlignment(); - MVT VecTy = ty(Op); - Type *Ty = EVT(VecTy).getTypeForEVT(*DAG.getContext()); - const DataLayout &DL = DAG.getDataLayout(); - unsigned NeedAlign = DL.getABITypeAlignment(Ty); - if (HaveAlign >= NeedAlign || !ExpandUnalignedLoads) - return Op; - - unsigned HwLen = Subtarget.getVectorLength(); - - SDValue Base = LN->getBasePtr(); - SDValue Chain = LN->getChain(); - auto BO = getBaseAndOffset(Base); - unsigned BaseOpc = BO.first.getOpcode(); - if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % HwLen == 0) - return Op; - - const SDLoc &dl(Op); - if (BO.second % HwLen != 0) { - BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first, - DAG.getConstant(BO.second % HwLen, dl, MVT::i32)); - BO.second -= BO.second % HwLen; - } - SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR) - ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first) - : BO.first; - SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl); - SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+HwLen, dl); - - MachineMemOperand *WideMMO = nullptr; - if (MachineMemOperand *MMO = LN->getMemOperand()) { - MachineFunction &MF = DAG.getMachineFunction(); - WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(), - 2*HwLen, HwLen, MMO->getAAInfo(), MMO->getRanges(), - MMO->getSyncScopeID(), MMO->getOrdering(), - MMO->getFailureOrdering()); - } - - SDValue Load0 = DAG.getLoad(VecTy, dl, Chain, Base0, WideMMO); - SDValue Load1 = DAG.getLoad(VecTy, dl, Chain, Base1, WideMMO); - - SDValue Aligned = getInstr(Hexagon::V6_valignb, dl, VecTy, - {Load1, Load0, BaseNoOff.getOperand(0)}, DAG); - SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Load0.getValue(1), Load1.getValue(1)); - SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl); - return M; -} - -SDValue HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const { assert(!Op.isMachineOpcode()); SmallVector<SDValue,2> OpsL, OpsH; @@ -1465,7 +1408,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); - case ISD::LOAD: return LowerHvxUnalignedLoad(Op, DAG); case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); @@ -1478,6 +1420,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); case ISD::SETCC: case ISD::INTRINSIC_VOID: return Op; + // Unaligned loads will be handled by the default lowering. + case ISD::LOAD: return SDValue(); } #ifndef NDEBUG Op.dumpr(&DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 99436aae201..105b6caf98f 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2522,6 +2522,11 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, int Offset) const { case MVT::i16: case MVT::i32: case MVT::i64: + case MVT::v2i16: + case MVT::v2i32: + case MVT::v4i8: + case MVT::v4i16: + case MVT::v8i8: return isInt<4>(Count); // For HVX vectors the auto-inc is s3 case MVT::v64i8: diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 46bdafd228f..e0cff2ac238 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -100,6 +100,17 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; +def SDTVecVecIntOp: + SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, + SDTCisVT<3,i32>]>; + +def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; +def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; + +def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), + (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; +def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; + // Pattern fragments to extract the low and high subregisters from a // 64-bit value. def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; @@ -1829,7 +1840,12 @@ let AddedComplexity = 20 in { defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>; + defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>; + defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>; defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; // No sextloadi1. @@ -1867,10 +1883,15 @@ let AddedComplexity = 60 in { def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; - def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; - def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>; def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>; + def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; + def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; @@ -1884,29 +1905,39 @@ let AddedComplexity = 60 in { } let AddedComplexity = 40 in { - def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; - def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; + def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 20 in { - def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; - def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; - def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; - def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; - def: Loadxr_add_pat<load, i32, L4_loadri_rr>; - def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; - def: Loadxr_add_pat<load, f32, L4_loadri_rr>; - def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; + def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; + def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; + def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; + def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; + def: Loadxr_add_pat<load, i32, L4_loadri_rr>; + def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>; + def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>; + def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>; + def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>; + def: Loadxr_add_pat<load, f32, L4_loadri_rr>; + def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; } let AddedComplexity = 40 in { @@ -1936,17 +1967,22 @@ let AddedComplexity = 20 in { // Absolute address let AddedComplexity = 60 in { - def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; - def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; - def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; - def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; - def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; - def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; - def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; + def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; + def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; + def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; + def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; + def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>; + def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>; + def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>; + def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; + def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; @@ -1972,18 +2008,23 @@ let AddedComplexity = 30 in { // GP-relative address let AddedComplexity = 100 in { - def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; - def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; - def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; - def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; - def: Loada_pat<load, i32, addrgp, L2_loadrigp>; - def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; - def: Loada_pat<load, f32, addrgp, L2_loadrigp>; - def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; + def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; + def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; + def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; + def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; + def: Loada_pat<load, i32, addrgp, L2_loadrigp>; + def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>; + def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>; + def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>; + def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>; + def: Loada_pat<load, f32, addrgp, L2_loadrigp>; + def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index a3dc8d3dabc..510c3cf83be 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -16,12 +16,6 @@ def HwLen2: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32); }]>; -def NHwLen: SDNodeXForm<imm, [{ - const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget()); - return CurDAG->getTargetConstant(-int(ST.getVectorLength()), SDLoc(N), - MVT::i32); -}]>; - def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>; def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt), @@ -42,7 +36,6 @@ def HexagonVZERO: SDNode<"HexagonISD::VZERO", SDTVecLeaf>; def HexagonQCAT: SDNode<"HexagonISD::QCAT", SDTVecBinOp>; def HexagonQTRUE: SDNode<"HexagonISD::QTRUE", SDTVecLeaf>; def HexagonQFALSE: SDNode<"HexagonISD::QFALSE", SDTVecLeaf>; -def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; def vzero: PatFrag<(ops), (HexagonVZERO)>; def qtrue: PatFrag<(ops), (HexagonQTRUE)>; @@ -51,7 +44,6 @@ def qcat: PatFrag<(ops node:$Qs, node:$Qt), (HexagonQCAT node:$Qs, node:$Qt)>; def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; -def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb $Vs)>; def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh $Vs)>; @@ -175,8 +167,6 @@ let Predicates = [UseHVX] in { def: Pat<(VecPI16 vzero), (Combinev (V6_vd0), (V6_vd0))>; def: Pat<(VecPI32 vzero), (Combinev (V6_vd0), (V6_vd0))>; - def: Pat<(valignaddr I32:$Rs), (A2_andir I32:$Rs, (NHwLen (i32 0)))>; - def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)), (Combinev HvxVR:$Vt, HvxVR:$Vs)>; def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index c1e46e3f208..87121b30906 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -241,6 +241,12 @@ public: return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; }); } + unsigned getTypeAlignment(MVT Ty) const { + if (isHVXVectorType(Ty, true)) + return getVectorLength(); + return Ty.getSizeInBits() / 8; + } + unsigned getL1CacheLineSize() const; unsigned getL1PrefetchDistance() const; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll index 5494bd84fcc..ca1c1747013 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -disable-packetizer < %s | FileCheck %s +; RUN: llc -march=hexagon -disable-packetizer -hexagon-align-loads < %s | FileCheck %s ; CHECK-LABEL: test_00: ; CHECK-DAG: v[[V00:[0-9]+]] = vmem(r[[B00:[0-9]+]]+#0) diff --git a/llvm/test/CodeGen/Hexagon/select-instr-align.ll b/llvm/test/CodeGen/Hexagon/select-instr-align.ll index 9d8939282c6..1021f924f1d 100644 --- a/llvm/test/CodeGen/Hexagon/select-instr-align.ll +++ b/llvm/test/CodeGen/Hexagon/select-instr-align.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -hvx-expand-unaligned-loads=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-align-loads=0 < %s | FileCheck %s ; CHECK-LABEL: aligned_load: ; CHECK: = vmem({{.*}}) diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll index 32abb75f20f..546ffdd66ff 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 -hexagon-align-loads=0 < %s | FileCheck %s ; CHECK-LABEL: danny: ; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0) diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll b/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll index f49a1e24a1b..83d56ed9ed4 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll @@ -1,9 +1,7 @@ ; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s ; Check that store is post-incremented. -; CHECK: memuh(r{{[0-9]+}}+#6) -; CHECK: combine(r{{[0-9]+}},r{{[0-9]+}}) -; CHECK: vaddh +; CHECK: memh(r{{[0-9]+}}++#2) target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32" target triple = "hexagon" |