diff options
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 64 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.cpp | 111 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLowering.h | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp | 60 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatterns.td | 131 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonPatternsHVX.td | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/Hexagon/HexagonSubtarget.h | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/select-instr-align.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll | 4 | 
14 files changed, 294 insertions, 127 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp index 54e501a6638..3540cf06b9c 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp @@ -93,12 +93,16 @@ void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {        Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;      break;    case MVT::i32: +  case MVT::v2i16: +  case MVT::v4i8:      Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;      break;    case MVT::i64: +  case MVT::v2i32: +  case MVT::v4i16: +  case MVT::v8i8:      Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;      break; -  // 64B    case MVT::v64i8:    case MVT::v32i16:    case MVT::v16i32: @@ -377,9 +381,14 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {      Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;      break;    case MVT::i32: +  case MVT::v2i16: +  case MVT::v4i8:      Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;      break;    case MVT::i64: +  case MVT::v2i32: +  case MVT::v4i16: +  case MVT::v8i8:      Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;      break;    case MVT::v64i8: @@ -657,6 +666,57 @@ void HexagonDAGToDAGISel::SelectBitcast(SDNode *N) {    CurDAG->RemoveDeadNode(N);  } +void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) { +  MVT ResTy = N->getValueType(0).getSimpleVT(); +  if (HST->isHVXVectorType(ResTy, true)) +    return SelectHvxVAlign(N); + +  const SDLoc &dl(N); +  unsigned VecLen = ResTy.getSizeInBits(); +  if (VecLen == 32) { +    SDValue Ops[] = { +      CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32), +      N->getOperand(0), +      CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32), +      N->getOperand(1), +      CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32) +    }; +    SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, +                                       MVT::i64, Ops); + +    // Shift right by "(Addr & 0x3) * 8" bytes. +    SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32); +    SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32); +    SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32, +                                       M0, N->getOperand(2), M1); +    SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64, +                                       SDValue(R, 0), SDValue(C, 0)); +    SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy, +                                               SDValue(S, 0)); +    ReplaceNode(N, E.getNode()); +  } else { +    assert(VecLen == 64); +    SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1, +                                        N->getOperand(2)); +    SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy, +                                        N->getOperand(0), N->getOperand(1), +                                        SDValue(Pu,0)); +    ReplaceNode(N, VA); +  } +} + +void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) { +  const SDLoc &dl(N); +  SDValue A = N->getOperand(1); +  int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue(); +  assert(isPowerOf2_32(-Mask)); + +  SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32); +  SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32, +                                      N->getOperand(0), M); +  ReplaceNode(N, AA); +} +  // Handle these nodes here to avoid having to write patterns for all  // combinations of input/output types. In all cases, the resulting  // instruction is the same. @@ -721,6 +781,8 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {    case ISD::STORE:                return SelectStore(N);    case ISD::INTRINSIC_W_CHAIN:    return SelectIntrinsicWChain(N);    case ISD::INTRINSIC_WO_CHAIN:   return SelectIntrinsicWOChain(N); +  case HexagonISD::VALIGN:        return SelectVAlign(N); +  case HexagonISD::VALIGNADDR:    return SelectVAlignAddr(N);    case HexagonISD::TYPECAST:      return SelectTypecast(N);    case HexagonISD::P2D:           return SelectP2D(N);    case HexagonISD::D2P:           return SelectD2P(N); diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h index dd2c6f4fc95..e031b08f671 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.h @@ -104,6 +104,8 @@ public:    void SelectV65Gather(SDNode *N);    void SelectV65GatherPred(SDNode *N);    void SelectHVXDualOutput(SDNode *N); +  void SelectVAlign(SDNode *N); +  void SelectVAlignAddr(SDNode *N);    void SelectTypecast(SDNode *N);    void SelectP2D(SDNode *N);    void SelectD2P(SDNode *N); @@ -127,6 +129,7 @@ private:    void SelectHvxShuffle(SDNode *N);    void SelectHvxRor(SDNode *N); +  void SelectHvxVAlign(SDNode *N);    bool keepsLowBits(const SDValue &Val, unsigned NumBits, SDValue &Src);    bool isAlignedMemNode(const MemSDNode *N) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp index 9b58e00cbe0..46f5bb4de8a 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp @@ -820,6 +820,7 @@ namespace llvm {      void selectShuffle(SDNode *N);      void selectRor(SDNode *N); +    void selectVAlign(SDNode *N);    private:      void materialize(const ResultStack &Results); @@ -2011,6 +2012,16 @@ void HvxSelector::selectRor(SDNode *N) {    DAG.RemoveDeadNode(N);  } +void HvxSelector::selectVAlign(SDNode *N) { +  SDValue Vv = N->getOperand(0); +  SDValue Vu = N->getOperand(1); +  SDValue Rt = N->getOperand(2); +  SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N), +                                    N->getValueType(0), {Vv, Vu, Rt}); +  ISel.ReplaceNode(N, NewN); +  DAG.RemoveDeadNode(N); +} +  void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {    HvxSelector(*this, *CurDAG).selectShuffle(N);  } @@ -2019,6 +2030,10 @@ void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {    HvxSelector(*this, *CurDAG).selectRor(N);  } +void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) { +  HvxSelector(*this, *CurDAG).selectVAlign(N); +} +  void HexagonDAGToDAGISel::SelectV65GatherPred(SDNode *N) {    const SDLoc &dl(N);    SDValue Chain = N->getOperand(0); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index b7bc2627596..69f5e1ffe52 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -103,6 +103,10 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",    cl::Hidden, cl::ZeroOrMore, cl::init(4),    cl::desc("Max #stores to inline memset")); +static cl::opt<bool> AlignLoads("hexagon-align-loads", +  cl::Hidden, cl::init(false), +  cl::desc("Rewrite unaligned loads as a pair of aligned loads")); +  namespace { @@ -544,8 +548,9 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,    EVT VT = LSN->getMemoryVT();    if (!VT.isSimple())      return false; -  bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || -                     VT == MVT::i32 || VT == MVT::i64 || +  bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || +                     VT == MVT::i64 || VT == MVT::v2i16 || MVT::v2i32 || +                     VT == MVT::v4i8 || VT == MVT::v4i16 || MVT::v8i8 ||                       Subtarget.isHVXVectorType(VT.getSimpleVT());    if (!IsLegalType)      return false; @@ -1495,6 +1500,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,      setOperationAction(ISD::XOR, NativeVT, Legal);    } +  // Custom lower unaligned loads. +  for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8, +                    MVT::v2i16, MVT::v4i16, MVT::v2i32}) { +    setOperationAction(ISD::LOAD, VecVT, Custom); +  } +    // Custom-lower bitcasts from i8 to v8i1.    setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);    setOperationAction(ISD::SETCC,          MVT::v2i16, Custom); @@ -1559,7 +1570,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,    // Handling of indexed loads/stores: default is "expand".    // -  for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { +  for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::v2i16, +                 MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {      setIndexedLoadAction(ISD::POST_INC, VT, Legal);      setIndexedStoreAction(ISD::POST_INC, VT, Legal);    } @@ -1718,6 +1730,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {    case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";    case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";    case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST"; +  case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";    case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";    case HexagonISD::OP_END:        break;    } @@ -2519,6 +2532,90 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {  }  SDValue +HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) +      const { +  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); +  unsigned HaveAlign = LN->getAlignment(); +  MVT LoadTy = ty(Op); +  unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy); +  if (HaveAlign >= NeedAlign) +    return Op; +  // Indexed loads/stores are created after legalizing operations, so we +  // shouldn't be getting unaligned post-incrementing loads at this point. +  assert(LN->isUnindexed() && "Expecting only unindexed loads"); + +  const SDLoc &dl(Op); +  const DataLayout &DL = DAG.getDataLayout(); +  LLVMContext &Ctx = *DAG.getContext(); +  unsigned AS = LN->getAddressSpace(); + +  // If the load aligning is disabled or the load can be broken up into two +  // smaller legal loads, do the default (target-independent) expansion. +  bool DoDefault = false; +  if (!AlignLoads) { +    if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign)) +      return Op; +    DoDefault = true; +  } +  if (!DoDefault && 2*HaveAlign == NeedAlign) { +    // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)". +    MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign) +                                : MVT::getVectorVT(MVT::i8, HaveAlign); +    DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign); +  } +  if (DoDefault) { +    std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG); +    return DAG.getMergeValues({P.first, P.second}, dl); +  } + +  // The code below generates two loads, both aligned as NeedAlign, and +  // with the distance of NeedAlign between them. For that to cover the +  // bits that need to be loaded (and without overlapping), the size of +  // the loads should be equal to NeedAlign. This is true for all loadable +  // types, but add an assertion in case something changes in the future. +  assert(LoadTy.getSizeInBits() == 8*NeedAlign); + +  unsigned LoadLen = NeedAlign; +  SDValue Base = LN->getBasePtr(); +  SDValue Chain = LN->getChain(); +  auto BO = getBaseAndOffset(Base); +  unsigned BaseOpc = BO.first.getOpcode(); +  if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0) +    return Op; + +  if (BO.second % LoadLen != 0) { +    BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first, +                           DAG.getConstant(BO.second % LoadLen, dl, MVT::i32)); +    BO.second -= BO.second % LoadLen; +  } +  SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR) +      ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first, +                    DAG.getConstant(NeedAlign, dl, MVT::i32)) +      : BO.first; +  SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl); +  SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl); + +  MachineMemOperand *WideMMO = nullptr; +  if (MachineMemOperand *MMO = LN->getMemOperand()) { +    MachineFunction &MF = DAG.getMachineFunction(); +    WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(), +                    2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(), +                    MMO->getSyncScopeID(), MMO->getOrdering(), +                    MMO->getFailureOrdering()); +  } + +  SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO); +  SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO); + +  SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy, +                                {Load1, Load0, BaseNoOff.getOperand(0)}); +  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, +                                 Load0.getValue(1), Load1.getValue(1)); +  SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl); +  return M; +} + +SDValue  HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {    SDValue Chain     = Op.getOperand(0);    SDValue Offset    = Op.getOperand(1); @@ -2553,8 +2650,11 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    if (Opc == ISD::INLINEASM)      return LowerINLINEASM(Op, DAG); -  if (isHvxOperation(Op)) -    return LowerHvxOperation(Op, DAG); +  if (isHvxOperation(Op)) { +    // If HVX lowering returns nothing, try the default lowering. +    if (SDValue V = LowerHvxOperation(Op, DAG)) +      return V; +  }    switch (Opc) {      default: @@ -2572,6 +2672,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {      case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);      case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);      case ISD::BITCAST:              return LowerBITCAST(Op, DAG); +    case ISD::LOAD:                 return LowerUnalignedLoad(Op, DAG);      case ISD::SRA:      case ISD::SHL:      case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 8ba08564547..607d8528f63 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -79,7 +79,9 @@ namespace HexagonISD {        VZERO,        TYPECAST,    // No-op that's used to convert between different legal                     // types in a register. -      VALIGNADDR,  // Align vector address: Op & -HwLen, except when it is +      VALIGN,      // Align two vectors (in Op0, Op1) to one that would have +                   // been loaded from address in Op2. +      VALIGNADDR,  // Align vector address: Op0 & -Op1, except when it is                     // an address in a vector load, then it's a no-op.        OP_END      }; @@ -153,6 +155,7 @@ namespace HexagonISD {      SDValue LowerANY_EXTEND(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; +    SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; @@ -418,7 +421,6 @@ namespace HexagonISD {      SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;      SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const; -    SDValue LowerHvxUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;      SDValue SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const;      SDValue SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp index b9e285131b3..58fd740d7a8 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp @@ -14,10 +14,6 @@  using namespace llvm; -static cl::opt<bool> ExpandUnalignedLoads("hvx-expand-unaligned-loads", -  cl::Hidden, cl::init(true), -  cl::desc("Expand unaligned HVX loads into a pair of aligned loads")); -  static const MVT LegalV64[] =  { MVT::v64i8,  MVT::v32i16,  MVT::v16i32 };  static const MVT LegalW64[] =  { MVT::v128i8, MVT::v64i16,  MVT::v32i32 };  static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16,  MVT::v32i32 }; @@ -1296,59 +1292,6 @@ HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {  }  SDValue -HexagonTargetLowering::LowerHvxUnalignedLoad(SDValue Op, SelectionDAG &DAG) -      const { -  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode()); -  unsigned HaveAlign = LN->getAlignment(); -  MVT VecTy = ty(Op); -  Type *Ty = EVT(VecTy).getTypeForEVT(*DAG.getContext()); -  const DataLayout &DL = DAG.getDataLayout(); -  unsigned NeedAlign = DL.getABITypeAlignment(Ty); -  if (HaveAlign >= NeedAlign || !ExpandUnalignedLoads) -    return Op; - -  unsigned HwLen = Subtarget.getVectorLength(); - -  SDValue Base = LN->getBasePtr(); -  SDValue Chain = LN->getChain(); -  auto BO = getBaseAndOffset(Base); -  unsigned BaseOpc = BO.first.getOpcode(); -  if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % HwLen == 0) -    return Op; - -  const SDLoc &dl(Op); -  if (BO.second % HwLen != 0) { -    BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first, -                           DAG.getConstant(BO.second % HwLen, dl, MVT::i32)); -    BO.second -= BO.second % HwLen; -  } -  SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR) -      ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first) -      : BO.first; -  SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl); -  SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+HwLen, dl); - -  MachineMemOperand *WideMMO = nullptr; -  if (MachineMemOperand *MMO = LN->getMemOperand()) { -    MachineFunction &MF = DAG.getMachineFunction(); -    WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(), -                    2*HwLen, HwLen, MMO->getAAInfo(), MMO->getRanges(), -                    MMO->getSyncScopeID(), MMO->getOrdering(), -                    MMO->getFailureOrdering()); -  } - -  SDValue Load0 = DAG.getLoad(VecTy, dl, Chain, Base0, WideMMO); -  SDValue Load1 = DAG.getLoad(VecTy, dl, Chain, Base1, WideMMO); - -  SDValue Aligned = getInstr(Hexagon::V6_valignb, dl, VecTy, -                             {Load1, Load0, BaseNoOff.getOperand(0)}, DAG); -  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, -                                 Load0.getValue(1), Load1.getValue(1)); -  SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl); -  return M; -} - -SDValue  HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {    assert(!Op.isMachineOpcode());    SmallVector<SDValue,2> OpsL, OpsH; @@ -1465,7 +1408,6 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {      case ISD::EXTRACT_SUBVECTOR:       return LowerHvxExtractSubvector(Op, DAG);      case ISD::EXTRACT_VECTOR_ELT:      return LowerHvxExtractElement(Op, DAG); -    case ISD::LOAD:                    return LowerHvxUnalignedLoad(Op, DAG);      case ISD::ANY_EXTEND:              return LowerHvxAnyExt(Op, DAG);      case ISD::SIGN_EXTEND:             return LowerHvxSignExt(Op, DAG);      case ISD::ZERO_EXTEND:             return LowerHvxZeroExt(Op, DAG); @@ -1478,6 +1420,8 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {      case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);      case ISD::SETCC:      case ISD::INTRINSIC_VOID:          return Op; +    // Unaligned loads will be handled by the default lowering. +    case ISD::LOAD:                    return SDValue();    }  #ifndef NDEBUG    Op.dumpr(&DAG); diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 99436aae201..105b6caf98f 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -2522,6 +2522,11 @@ bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, int Offset) const {      case MVT::i16:      case MVT::i32:      case MVT::i64: +    case MVT::v2i16: +    case MVT::v2i32: +    case MVT::v4i8: +    case MVT::v4i16: +    case MVT::v8i8:        return isInt<4>(Count);      // For HVX vectors the auto-inc is s3      case MVT::v64i8: diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td index 46bdafd228f..e0cff2ac238 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatterns.td +++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td @@ -100,6 +100,17 @@ def HWI8:   PatLeaf<(VecPI8  HvxWR:$R)>;  def HWI16:  PatLeaf<(VecPI16 HvxWR:$R)>;  def HWI32:  PatLeaf<(VecPI32 HvxWR:$R)>; +def SDTVecVecIntOp: +  SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, +                       SDTCisVT<3,i32>]>; + +def HexagonVALIGN:     SDNode<"HexagonISD::VALIGN",     SDTVecVecIntOp>; +def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; + +def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), +                    (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; +def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; +  // Pattern fragments to extract the low and high subregisters from a  // 64-bit value.  def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; @@ -1829,7 +1840,12 @@ let AddedComplexity = 20 in {    defm: Loadxi_pat<zextloadv2i8,    v2i16, anyimm1, L2_loadbzw2_io>;    defm: Loadxi_pat<zextloadv4i8,    v4i16, anyimm2, L2_loadbzw4_io>;    defm: Loadxi_pat<load,            i32,   anyimm2, L2_loadri_io>; +  defm: Loadxi_pat<load,            v2i16, anyimm2, L2_loadri_io>; +  defm: Loadxi_pat<load,            v4i8,  anyimm2, L2_loadri_io>;    defm: Loadxi_pat<load,            i64,   anyimm3, L2_loadrd_io>; +  defm: Loadxi_pat<load,            v2i32, anyimm3, L2_loadrd_io>; +  defm: Loadxi_pat<load,            v4i16, anyimm3, L2_loadrd_io>; +  defm: Loadxi_pat<load,            v8i8,  anyimm3, L2_loadrd_io>;    defm: Loadxi_pat<load,            f32,   anyimm2, L2_loadri_io>;    defm: Loadxi_pat<load,            f64,   anyimm3, L2_loadrd_io>;    // No sextloadi1. @@ -1867,10 +1883,15 @@ let AddedComplexity  = 60 in {    def: Loadxu_pat<zextloadi16,  i32,   anyimm1, L4_loadruh_ur>;    def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>;    def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; -  def: Loadxu_pat<load,         f32,   anyimm2, L4_loadri_ur>; -  def: Loadxu_pat<load,         f64,   anyimm3, L4_loadrd_ur>;    def: Loadxu_pat<load,         i32,   anyimm2, L4_loadri_ur>; +  def: Loadxu_pat<load,         v2i16, anyimm2, L4_loadri_ur>; +  def: Loadxu_pat<load,         v4i8,  anyimm2, L4_loadri_ur>;    def: Loadxu_pat<load,         i64,   anyimm3, L4_loadrd_ur>; +  def: Loadxu_pat<load,         v2i32, anyimm3, L4_loadrd_ur>; +  def: Loadxu_pat<load,         v4i16, anyimm3, L4_loadrd_ur>; +  def: Loadxu_pat<load,         v8i8,  anyimm3, L4_loadrd_ur>; +  def: Loadxu_pat<load,         f32,   anyimm2, L4_loadri_ur>; +  def: Loadxu_pat<load,         f64,   anyimm3, L4_loadrd_ur>;    def: Loadxum_pat<sextloadi8,  i64, anyimm0, ToSext64, L4_loadrb_ur>;    def: Loadxum_pat<zextloadi8,  i64, anyimm0, ToZext64, L4_loadrub_ur>; @@ -1884,29 +1905,39 @@ let AddedComplexity  = 60 in {  }  let AddedComplexity = 40 in { -  def: Loadxr_shl_pat<extloadi8,     i32, L4_loadrub_rr>; -  def: Loadxr_shl_pat<zextloadi8,    i32, L4_loadrub_rr>; -  def: Loadxr_shl_pat<sextloadi8,    i32, L4_loadrb_rr>; -  def: Loadxr_shl_pat<extloadi16,    i32, L4_loadruh_rr>; -  def: Loadxr_shl_pat<zextloadi16,   i32, L4_loadruh_rr>; -  def: Loadxr_shl_pat<sextloadi16,   i32, L4_loadrh_rr>; -  def: Loadxr_shl_pat<load,          i32, L4_loadri_rr>; -  def: Loadxr_shl_pat<load,          i64, L4_loadrd_rr>; -  def: Loadxr_shl_pat<load,          f32, L4_loadri_rr>; -  def: Loadxr_shl_pat<load,          f64, L4_loadrd_rr>; +  def: Loadxr_shl_pat<extloadi8,     i32,   L4_loadrub_rr>; +  def: Loadxr_shl_pat<zextloadi8,    i32,   L4_loadrub_rr>; +  def: Loadxr_shl_pat<sextloadi8,    i32,   L4_loadrb_rr>; +  def: Loadxr_shl_pat<extloadi16,    i32,   L4_loadruh_rr>; +  def: Loadxr_shl_pat<zextloadi16,   i32,   L4_loadruh_rr>; +  def: Loadxr_shl_pat<sextloadi16,   i32,   L4_loadrh_rr>; +  def: Loadxr_shl_pat<load,          i32,   L4_loadri_rr>; +  def: Loadxr_shl_pat<load,          v2i16, L4_loadri_rr>; +  def: Loadxr_shl_pat<load,          v4i8,  L4_loadri_rr>; +  def: Loadxr_shl_pat<load,          i64,   L4_loadrd_rr>; +  def: Loadxr_shl_pat<load,          v2i32, L4_loadrd_rr>; +  def: Loadxr_shl_pat<load,          v4i16, L4_loadrd_rr>; +  def: Loadxr_shl_pat<load,          v8i8,  L4_loadrd_rr>; +  def: Loadxr_shl_pat<load,          f32,   L4_loadri_rr>; +  def: Loadxr_shl_pat<load,          f64,   L4_loadrd_rr>;  }  let AddedComplexity = 20 in { -  def: Loadxr_add_pat<extloadi8,     i32, L4_loadrub_rr>; -  def: Loadxr_add_pat<zextloadi8,    i32, L4_loadrub_rr>; -  def: Loadxr_add_pat<sextloadi8,    i32, L4_loadrb_rr>; -  def: Loadxr_add_pat<extloadi16,    i32, L4_loadruh_rr>; -  def: Loadxr_add_pat<zextloadi16,   i32, L4_loadruh_rr>; -  def: Loadxr_add_pat<sextloadi16,   i32, L4_loadrh_rr>; -  def: Loadxr_add_pat<load,          i32, L4_loadri_rr>; -  def: Loadxr_add_pat<load,          i64, L4_loadrd_rr>; -  def: Loadxr_add_pat<load,          f32, L4_loadri_rr>; -  def: Loadxr_add_pat<load,          f64, L4_loadrd_rr>; +  def: Loadxr_add_pat<extloadi8,     i32,   L4_loadrub_rr>; +  def: Loadxr_add_pat<zextloadi8,    i32,   L4_loadrub_rr>; +  def: Loadxr_add_pat<sextloadi8,    i32,   L4_loadrb_rr>; +  def: Loadxr_add_pat<extloadi16,    i32,   L4_loadruh_rr>; +  def: Loadxr_add_pat<zextloadi16,   i32,   L4_loadruh_rr>; +  def: Loadxr_add_pat<sextloadi16,   i32,   L4_loadrh_rr>; +  def: Loadxr_add_pat<load,          i32,   L4_loadri_rr>; +  def: Loadxr_add_pat<load,          v2i16, L4_loadri_rr>; +  def: Loadxr_add_pat<load,          v4i8,  L4_loadri_rr>; +  def: Loadxr_add_pat<load,          i64,   L4_loadrd_rr>; +  def: Loadxr_add_pat<load,          v2i32, L4_loadrd_rr>; +  def: Loadxr_add_pat<load,          v4i16, L4_loadrd_rr>; +  def: Loadxr_add_pat<load,          v8i8,  L4_loadrd_rr>; +  def: Loadxr_add_pat<load,          f32,   L4_loadri_rr>; +  def: Loadxr_add_pat<load,          f64,   L4_loadrd_rr>;  }  let AddedComplexity = 40 in { @@ -1936,17 +1967,22 @@ let AddedComplexity = 20 in {  // Absolute address  let AddedComplexity  = 60 in { -  def: Loada_pat<zextloadi1,      i32, anyimm0, PS_loadrubabs>; -  def: Loada_pat<sextloadi8,      i32, anyimm0, PS_loadrbabs>; -  def: Loada_pat<extloadi8,       i32, anyimm0, PS_loadrubabs>; -  def: Loada_pat<zextloadi8,      i32, anyimm0, PS_loadrubabs>; -  def: Loada_pat<sextloadi16,     i32, anyimm1, PS_loadrhabs>; -  def: Loada_pat<extloadi16,      i32, anyimm1, PS_loadruhabs>; -  def: Loada_pat<zextloadi16,     i32, anyimm1, PS_loadruhabs>; -  def: Loada_pat<load,            i32, anyimm2, PS_loadriabs>; -  def: Loada_pat<load,            i64, anyimm3, PS_loadrdabs>; -  def: Loada_pat<load,            f32, anyimm2, PS_loadriabs>; -  def: Loada_pat<load,            f64, anyimm3, PS_loadrdabs>; +  def: Loada_pat<zextloadi1,      i32,   anyimm0, PS_loadrubabs>; +  def: Loada_pat<sextloadi8,      i32,   anyimm0, PS_loadrbabs>; +  def: Loada_pat<extloadi8,       i32,   anyimm0, PS_loadrubabs>; +  def: Loada_pat<zextloadi8,      i32,   anyimm0, PS_loadrubabs>; +  def: Loada_pat<sextloadi16,     i32,   anyimm1, PS_loadrhabs>; +  def: Loada_pat<extloadi16,      i32,   anyimm1, PS_loadruhabs>; +  def: Loada_pat<zextloadi16,     i32,   anyimm1, PS_loadruhabs>; +  def: Loada_pat<load,            i32,   anyimm2, PS_loadriabs>; +  def: Loada_pat<load,            v2i16, anyimm2, PS_loadriabs>; +  def: Loada_pat<load,            v4i8,  anyimm2, PS_loadriabs>; +  def: Loada_pat<load,            i64,   anyimm3, PS_loadrdabs>; +  def: Loada_pat<load,            v2i32, anyimm3, PS_loadrdabs>; +  def: Loada_pat<load,            v4i16, anyimm3, PS_loadrdabs>; +  def: Loada_pat<load,            v8i8,  anyimm3, PS_loadrdabs>; +  def: Loada_pat<load,            f32,   anyimm2, PS_loadriabs>; +  def: Loada_pat<load,            f64,   anyimm3, PS_loadrdabs>;    def: Loada_pat<atomic_load_8,   i32, anyimm0, PS_loadrubabs>;    def: Loada_pat<atomic_load_16,  i32, anyimm1, PS_loadruhabs>; @@ -1972,18 +2008,23 @@ let AddedComplexity  = 30 in {  // GP-relative address  let AddedComplexity  = 100 in { -  def: Loada_pat<extloadi1,       i32, addrgp,  L2_loadrubgp>; -  def: Loada_pat<zextloadi1,      i32, addrgp,  L2_loadrubgp>; -  def: Loada_pat<extloadi8,       i32, addrgp,  L2_loadrubgp>; -  def: Loada_pat<sextloadi8,      i32, addrgp,  L2_loadrbgp>; -  def: Loada_pat<zextloadi8,      i32, addrgp,  L2_loadrubgp>; -  def: Loada_pat<extloadi16,      i32, addrgp,  L2_loadruhgp>; -  def: Loada_pat<sextloadi16,     i32, addrgp,  L2_loadrhgp>; -  def: Loada_pat<zextloadi16,     i32, addrgp,  L2_loadruhgp>; -  def: Loada_pat<load,            i32, addrgp,  L2_loadrigp>; -  def: Loada_pat<load,            i64, addrgp,  L2_loadrdgp>; -  def: Loada_pat<load,            f32, addrgp,  L2_loadrigp>; -  def: Loada_pat<load,            f64, addrgp,  L2_loadrdgp>; +  def: Loada_pat<extloadi1,       i32,   addrgp,  L2_loadrubgp>; +  def: Loada_pat<zextloadi1,      i32,   addrgp,  L2_loadrubgp>; +  def: Loada_pat<extloadi8,       i32,   addrgp,  L2_loadrubgp>; +  def: Loada_pat<sextloadi8,      i32,   addrgp,  L2_loadrbgp>; +  def: Loada_pat<zextloadi8,      i32,   addrgp,  L2_loadrubgp>; +  def: Loada_pat<extloadi16,      i32,   addrgp,  L2_loadruhgp>; +  def: Loada_pat<sextloadi16,     i32,   addrgp,  L2_loadrhgp>; +  def: Loada_pat<zextloadi16,     i32,   addrgp,  L2_loadruhgp>; +  def: Loada_pat<load,            i32,   addrgp,  L2_loadrigp>; +  def: Loada_pat<load,            v2i16, addrgp,  L2_loadrigp>; +  def: Loada_pat<load,            v4i8,  addrgp,  L2_loadrigp>; +  def: Loada_pat<load,            i64,   addrgp,  L2_loadrdgp>; +  def: Loada_pat<load,            v2i32, addrgp,  L2_loadrdgp>; +  def: Loada_pat<load,            v4i16, addrgp,  L2_loadrdgp>; +  def: Loada_pat<load,            v8i8,  addrgp,  L2_loadrdgp>; +  def: Loada_pat<load,            f32,   addrgp,  L2_loadrigp>; +  def: Loada_pat<load,            f64,   addrgp,  L2_loadrdgp>;    def: Loada_pat<atomic_load_8,   i32, addrgp,  L2_loadrubgp>;    def: Loada_pat<atomic_load_16,  i32, addrgp,  L2_loadruhgp>; diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td index a3dc8d3dabc..510c3cf83be 100644 --- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td @@ -16,12 +16,6 @@ def HwLen2: SDNodeXForm<imm, [{    return CurDAG->getTargetConstant(ST.getVectorLength()/2, SDLoc(N), MVT::i32);  }]>; -def NHwLen: SDNodeXForm<imm, [{ -  const auto &ST = static_cast<const HexagonSubtarget&>(CurDAG->getSubtarget()); -  return CurDAG->getTargetConstant(-int(ST.getVectorLength()), SDLoc(N), -                                   MVT::i32); -}]>; -  def Q2V: OutPatFrag<(ops node:$Qs), (V6_vandqrt $Qs, (A2_tfrsi -1))>;  def Combinev: OutPatFrag<(ops node:$Vs, node:$Vt), @@ -42,7 +36,6 @@ def HexagonVZERO:      SDNode<"HexagonISD::VZERO",      SDTVecLeaf>;  def HexagonQCAT:       SDNode<"HexagonISD::QCAT",       SDTVecBinOp>;  def HexagonQTRUE:      SDNode<"HexagonISD::QTRUE",      SDTVecLeaf>;  def HexagonQFALSE:     SDNode<"HexagonISD::QFALSE",     SDTVecLeaf>; -def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;  def vzero:  PatFrag<(ops), (HexagonVZERO)>;  def qtrue:  PatFrag<(ops), (HexagonQTRUE)>; @@ -51,7 +44,6 @@ def qcat:   PatFrag<(ops node:$Qs, node:$Qt),                      (HexagonQCAT node:$Qs, node:$Qt)>;  def qnot: PatFrag<(ops node:$Qs), (xor node:$Qs, qtrue)>; -def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;  def VSxtb: OutPatFrag<(ops node:$Vs), (V6_vunpackb  $Vs)>;  def VSxth: OutPatFrag<(ops node:$Vs), (V6_vunpackh  $Vs)>; @@ -175,8 +167,6 @@ let Predicates = [UseHVX] in {    def: Pat<(VecPI16 vzero), (Combinev (V6_vd0), (V6_vd0))>;    def: Pat<(VecPI32 vzero), (Combinev (V6_vd0), (V6_vd0))>; -  def: Pat<(valignaddr I32:$Rs), (A2_andir I32:$Rs, (NHwLen (i32 0)))>; -    def: Pat<(VecPI8 (concat_vectors HVI8:$Vs, HVI8:$Vt)),             (Combinev HvxVR:$Vt, HvxVR:$Vs)>;    def: Pat<(VecPI16 (concat_vectors HVI16:$Vs, HVI16:$Vt)), diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index c1e46e3f208..87121b30906 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -241,6 +241,12 @@ public:      return llvm::any_of(ElemTypes, [ElemTy] (MVT T) { return ElemTy == T; });    } +  unsigned getTypeAlignment(MVT Ty) const { +    if (isHVXVectorType(Ty, true)) +      return getVectorLength(); +    return Ty.getSizeInBits() / 8; +  } +    unsigned getL1CacheLineSize() const;    unsigned getL1PrefetchDistance() const; diff --git a/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll b/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll index 5494bd84fcc..ca1c1747013 100644 --- a/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/isel-expand-unaligned-loads.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -disable-packetizer < %s | FileCheck %s +; RUN: llc -march=hexagon -disable-packetizer -hexagon-align-loads < %s | FileCheck %s  ; CHECK-LABEL: test_00:  ; CHECK-DAG: v[[V00:[0-9]+]] = vmem(r[[B00:[0-9]+]]+#0) diff --git a/llvm/test/CodeGen/Hexagon/select-instr-align.ll b/llvm/test/CodeGen/Hexagon/select-instr-align.ll index 9d8939282c6..1021f924f1d 100644 --- a/llvm/test/CodeGen/Hexagon/select-instr-align.ll +++ b/llvm/test/CodeGen/Hexagon/select-instr-align.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -hvx-expand-unaligned-loads=0 < %s | FileCheck %s +; RUN: llc -march=hexagon -hexagon-align-loads=0 < %s | FileCheck %s  ; CHECK-LABEL: aligned_load:  ; CHECK: = vmem({{.*}}) diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll index 32abb75f20f..546ffdd66ff 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -O0 < %s | FileCheck %s +; RUN: llc -march=hexagon -O0 -hexagon-align-loads=0 < %s | FileCheck %s  ; CHECK-LABEL: danny:  ; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0) diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll b/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll index f49a1e24a1b..83d56ed9ed4 100644 --- a/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll +++ b/llvm/test/CodeGen/Hexagon/vect/vect-v4i16.ll @@ -1,9 +1,7 @@  ; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s  ; Check that store is post-incremented. -; CHECK: memuh(r{{[0-9]+}}+#6) -; CHECK: combine(r{{[0-9]+}},r{{[0-9]+}}) -; CHECK: vaddh +; CHECK: memh(r{{[0-9]+}}++#2)  target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"  target triple = "hexagon"  | 

