diff options
Diffstat (limited to 'llvm/lib')
17 files changed, 2649 insertions, 35 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConv.td b/llvm/lib/Target/AArch64/AArch64CallingConv.td index b880d8373de..bff7eebe00e 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConv.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConv.td @@ -61,7 +61,7 @@ def CC_A64_APCS : CallingConv<[    // Vectors and Floating-point types.    CCIfType<[v2i8], CCBitConvertToType<f16>>,    CCIfType<[v4i8, v2i16], CCBitConvertToType<f32>>, -  CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType<f64>>, +  CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64], CCBitConvertToType<f64>>,    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],             CCBitConvertToType<f128>>, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index d0abc0bbd11..44b691bfcce 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -42,6 +42,8 @@ static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {  AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)    : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) { +  const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>(); +    // SIMD compares set the entire lane's bits to 1    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); @@ -53,6 +55,21 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)    addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);    addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); +  if (Subtarget->hasNEON()) { +    // And the vectors +    addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass); +    addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass); +    addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass); +    addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass); +    addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass); +    addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass); +    addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass); +    addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass); +    addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass); +    addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass); +    addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass); +  } +    computeRegisterProperties();    // We combine OR nodes for bitfield and NEON BSL operations. @@ -251,6 +268,31 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)    setExceptionPointerRegister(AArch64::X0);    setExceptionSelectorRegister(AArch64::X1); + +  if (Subtarget->hasNEON()) { +    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); +    setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + +    setOperationAction(ISD::SETCC, MVT::v8i8, Custom); +    setOperationAction(ISD::SETCC, MVT::v16i8, Custom); +    setOperationAction(ISD::SETCC, MVT::v4i16, Custom); +    setOperationAction(ISD::SETCC, MVT::v8i16, Custom); +    setOperationAction(ISD::SETCC, MVT::v2i32, Custom); +    setOperationAction(ISD::SETCC, MVT::v4i32, Custom); +    setOperationAction(ISD::SETCC, MVT::v2i64, Custom); +    setOperationAction(ISD::SETCC, MVT::v2f32, Custom); +    setOperationAction(ISD::SETCC, MVT::v4f32, Custom); +    setOperationAction(ISD::SETCC, MVT::v2f64, Custom); +  }  }  EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { @@ -777,7 +819,22 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {    case AArch64ISD::WrapperLarge:   return "AArch64ISD::WrapperLarge";    case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall"; -  default:                       return NULL; +  case AArch64ISD::NEON_BSL: +    return "AArch64ISD::NEON_BSL"; +  case AArch64ISD::NEON_MOVIMM: +    return "AArch64ISD::NEON_MOVIMM"; +  case AArch64ISD::NEON_MVNIMM: +    return "AArch64ISD::NEON_MVNIMM"; +  case AArch64ISD::NEON_FMOVIMM: +    return "AArch64ISD::NEON_FMOVIMM"; +  case AArch64ISD::NEON_CMP: +    return "AArch64ISD::NEON_CMP"; +  case AArch64ISD::NEON_CMPZ: +    return "AArch64ISD::NEON_CMPZ"; +  case AArch64ISD::NEON_TST: +    return "AArch64ISD::NEON_TST"; +  default: +    return NULL;    }  } @@ -2230,6 +2287,213 @@ AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {                       DAG.getConstant(A64CC::NE, MVT::i32));  } +static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) { +  SDLoc DL(Op); +  SDValue LHS = Op.getOperand(0); +  SDValue RHS = Op.getOperand(1); +  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); +  EVT VT = Op.getValueType(); +  bool Invert = false; +  SDValue Op0, Op1; +  unsigned Opcode; + +  if (LHS.getValueType().isInteger()) { + +    // Attempt to use Vector Integer Compare Mask Test instruction. +    // TST = icmp ne (and (op0, op1), zero). +    if (CC == ISD::SETNE) { +      if (((LHS.getOpcode() == ISD::AND) && +           ISD::isBuildVectorAllZeros(RHS.getNode())) || +          ((RHS.getOpcode() == ISD::AND) && +           ISD::isBuildVectorAllZeros(LHS.getNode()))) { + +        SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS; +        SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0)); +        SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1)); +        return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS); +      } +    } + +    // Attempt to use Vector Integer Compare Mask against Zero instr (Signed). +    // Note: Compare against Zero does not support unsigned predicates. +    if ((ISD::isBuildVectorAllZeros(RHS.getNode()) || +         ISD::isBuildVectorAllZeros(LHS.getNode())) && +        !isUnsignedIntSetCC(CC)) { + +      // If LHS is the zero value, swap operands and CondCode. +      if (ISD::isBuildVectorAllZeros(LHS.getNode())) { +        CC = getSetCCSwappedOperands(CC); +        Op0 = RHS; +      } else +        Op0 = LHS; + +      // Ensure valid CondCode for Compare Mask against Zero instruction: +      // EQ, GE, GT, LE, LT. +      if (ISD::SETNE == CC) { +        Invert = true; +        CC = ISD::SETEQ; +      } + +      // Using constant type to differentiate integer and FP compares with zero. +      Op1 = DAG.getConstant(0, MVT::i32); +      Opcode = AArch64ISD::NEON_CMPZ; + +    } else { +      // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned). +      // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT. +      bool Swap = false; +      switch (CC) { +      default: +        llvm_unreachable("Illegal integer comparison."); +      case ISD::SETEQ: +      case ISD::SETGT: +      case ISD::SETGE: +      case ISD::SETUGT: +      case ISD::SETUGE: +        break; +      case ISD::SETNE: +        Invert = true; +        CC = ISD::SETEQ; +        break; +      case ISD::SETULT: +      case ISD::SETULE: +      case ISD::SETLT: +      case ISD::SETLE: +        Swap = true; +        CC = getSetCCSwappedOperands(CC); +      } + +      if (Swap) +        std::swap(LHS, RHS); + +      Opcode = AArch64ISD::NEON_CMP; +      Op0 = LHS; +      Op1 = RHS; +    } + +    // Generate Compare Mask instr or Compare Mask against Zero instr. +    SDValue NeonCmp = +        DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); + +    if (Invert) +      NeonCmp = DAG.getNOT(DL, NeonCmp, VT); + +    return NeonCmp; +  } + +  // Now handle Floating Point cases. +  // Attempt to use Vector Floating Point Compare Mask against Zero instruction. +  if (ISD::isBuildVectorAllZeros(RHS.getNode()) || +      ISD::isBuildVectorAllZeros(LHS.getNode())) { + +    // If LHS is the zero value, swap operands and CondCode. +    if (ISD::isBuildVectorAllZeros(LHS.getNode())) { +      CC = getSetCCSwappedOperands(CC); +      Op0 = RHS; +    } else +      Op0 = LHS; + +    // Using constant type to differentiate integer and FP compares with zero. +    Op1 = DAG.getConstantFP(0, MVT::f32); +    Opcode = AArch64ISD::NEON_CMPZ; +  } else { +    // Attempt to use Vector Floating Point Compare Mask instruction. +    Op0 = LHS; +    Op1 = RHS; +    Opcode = AArch64ISD::NEON_CMP; +  } + +  SDValue NeonCmpAlt; +  // Some register compares have to be implemented with swapped CC and operands, +  // e.g.: OLT implemented as OGT with swapped operands. +  bool SwapIfRegArgs = false; + +  // Ensure valid CondCode for FP Compare Mask against Zero instruction: +  // EQ, GE, GT, LE, LT. +  // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT. +  switch (CC) { +  default: +    llvm_unreachable("Illegal FP comparison"); +  case ISD::SETUNE: +  case ISD::SETNE: +    Invert = true; // Fallthrough +  case ISD::SETOEQ: +  case ISD::SETEQ: +    CC = ISD::SETEQ; +    break; +  case ISD::SETOLT: +  case ISD::SETLT: +    CC = ISD::SETLT; +    SwapIfRegArgs = true; +    break; +  case ISD::SETOGT: +  case ISD::SETGT: +    CC = ISD::SETGT; +    break; +  case ISD::SETOLE: +  case ISD::SETLE: +    CC = ISD::SETLE; +    SwapIfRegArgs = true; +    break; +  case ISD::SETOGE: +  case ISD::SETGE: +    CC = ISD::SETGE; +    break; +  case ISD::SETUGE: +    Invert = true; +    CC = ISD::SETLT; +    SwapIfRegArgs = true; +    break; +  case ISD::SETULE: +    Invert = true; +    CC = ISD::SETGT; +    break; +  case ISD::SETUGT: +    Invert = true; +    CC = ISD::SETLE; +    SwapIfRegArgs = true; +    break; +  case ISD::SETULT: +    Invert = true; +    CC = ISD::SETGE; +    break; +  case ISD::SETUEQ: +    Invert = true; // Fallthrough +  case ISD::SETONE: +    // Expand this to (OGT |OLT). +    NeonCmpAlt = +        DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT)); +    CC = ISD::SETLT; +    SwapIfRegArgs = true; +    break; +  case ISD::SETUO: +    Invert = true; // Fallthrough +  case ISD::SETO: +    // Expand this to (OGE | OLT). +    NeonCmpAlt = +        DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE)); +    CC = ISD::SETLT; +    SwapIfRegArgs = true; +    break; +  } + +  if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) { +    CC = getSetCCSwappedOperands(CC); +    std::swap(Op0, Op1); +  } + +  // Generate FP Compare Mask instr or FP Compare Mask against Zero instr +  SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); + +  if (NeonCmpAlt.getNode()) +    NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt); + +  if (Invert) +    NeonCmp = DAG.getNOT(DL, NeonCmp, VT); + +  return NeonCmp; +} +  // (SETCC lhs, rhs, condcode)  SDValue  AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { @@ -2239,6 +2503,9 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {    ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();    EVT VT = Op.getValueType(); +  if (VT.isVector()) +    return LowerVectorSETCC(Op, DAG); +    if (LHS.getValueType() == MVT::f128) {      // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS      // for the rest of the function (some i32 or i64 values). @@ -2395,11 +2662,155 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {    case ISD::SETCC: return LowerSETCC(Op, DAG);    case ISD::VACOPY: return LowerVACOPY(Op, DAG);    case ISD::VASTART: return LowerVASTART(Op, DAG); +  case ISD::BUILD_VECTOR: +    return LowerBUILD_VECTOR(Op, DAG, getSubtarget());    }    return SDValue();  } +/// Check if the specified splat value corresponds to a valid vector constant +/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI).  If +/// so, return the encoded 8-bit immediate and the OpCmode instruction fields +/// values. +static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, +                              unsigned SplatBitSize, SelectionDAG &DAG, +                              bool is128Bits, NeonModImmType type, EVT &VT, +                              unsigned &Imm, unsigned &OpCmode) { +  switch (SplatBitSize) { +  default: +    llvm_unreachable("unexpected size for isNeonModifiedImm"); +  case 8: { +    if (type != Neon_Mov_Imm) +      return false; +    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); +    // Neon movi per byte: Op=0, Cmode=1110. +    OpCmode = 0xe; +    Imm = SplatBits; +    VT = is128Bits ? MVT::v16i8 : MVT::v8i8; +    break; +  } +  case 16: { +    // Neon move inst per halfword +    VT = is128Bits ? MVT::v8i16 : MVT::v4i16; +    if ((SplatBits & ~0xff) == 0) { +      // Value = 0x00nn is 0x00nn LSL 0 +      // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000 +      // bic:  Op=1, Cmode=1001;  orr:  Op=0, Cmode=1001 +      // Op=x, Cmode=100y +      Imm = SplatBits; +      OpCmode = 0x8; +      break; +    } +    if ((SplatBits & ~0xff00) == 0) { +      // Value = 0xnn00 is 0x00nn LSL 8 +      // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010 +      // bic:  Op=1, Cmode=1011;  orr:  Op=0, Cmode=1011 +      // Op=x, Cmode=101x +      Imm = SplatBits >> 8; +      OpCmode = 0xa; +      break; +    } +    // can't handle any other +    return false; +  } + +  case 32: { +    // First the LSL variants (MSL is unusable by some interested instructions). + +    // Neon move instr per word, shift zeros +    VT = is128Bits ? MVT::v4i32 : MVT::v2i32; +    if ((SplatBits & ~0xff) == 0) { +      // Value = 0x000000nn is 0x000000nn LSL 0 +      // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000 +      // bic:  Op=1, Cmode= 0001; orr:  Op=0, Cmode= 0001 +      // Op=x, Cmode=000x +      Imm = SplatBits; +      OpCmode = 0; +      break; +    } +    if ((SplatBits & ~0xff00) == 0) { +      // Value = 0x0000nn00 is 0x000000nn LSL 8 +      // movi: Op=0, Cmode= 0010;  mvni: Op=1, Cmode= 0010 +      // bic:  Op=1, Cmode= 0011;  orr : Op=0, Cmode= 0011 +      // Op=x, Cmode=001x +      Imm = SplatBits >> 8; +      OpCmode = 0x2; +      break; +    } +    if ((SplatBits & ~0xff0000) == 0) { +      // Value = 0x00nn0000 is 0x000000nn LSL 16 +      // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100 +      // bic:  Op=1, Cmode= 0101; orr:  Op=0, Cmode= 0101 +      // Op=x, Cmode=010x +      Imm = SplatBits >> 16; +      OpCmode = 0x4; +      break; +    } +    if ((SplatBits & ~0xff000000) == 0) { +      // Value = 0xnn000000 is 0x000000nn LSL 24 +      // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110 +      // bic:  Op=1, Cmode= 0111; orr:  Op=0, Cmode= 0111 +      // Op=x, Cmode=011x +      Imm = SplatBits >> 24; +      OpCmode = 0x6; +      break; +    } + +    // Now the MSL immediates. + +    // Neon move instr per word, shift ones +    if ((SplatBits & ~0xffff) == 0 && +        ((SplatBits | SplatUndef) & 0xff) == 0xff) { +      // Value = 0x0000nnff is 0x000000nn MSL 8 +      // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100 +      // Op=x, Cmode=1100 +      Imm = SplatBits >> 8; +      OpCmode = 0xc; +      break; +    } +    if ((SplatBits & ~0xffffff) == 0 && +        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { +      // Value = 0x00nnffff is 0x000000nn MSL 16 +      // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101 +      // Op=x, Cmode=1101 +      Imm = SplatBits >> 16; +      OpCmode = 0xd; +      break; +    } +    // can't handle any other +    return false; +  } + +  case 64: { +    if (type != Neon_Mov_Imm) +      return false; +    // Neon move instr bytemask, where each byte is either 0x00 or 0xff. +    // movi Op=1, Cmode=1110. +    OpCmode = 0x1e; +    uint64_t BitMask = 0xff; +    uint64_t Val = 0; +    unsigned ImmMask = 1; +    Imm = 0; +    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { +      if (((SplatBits | SplatUndef) & BitMask) == BitMask) { +        Val |= BitMask; +        Imm |= ImmMask; +      } else if ((SplatBits & BitMask) != 0) { +        return false; +      } +      BitMask <<= 8; +      ImmMask <<= 1; +    } +    SplatBits = Val; +    VT = is128Bits ? MVT::v2i64 : MVT::v1i64; +    break; +  } +  } + +  return true; +} +  static SDValue PerformANDCombine(SDNode *N,                                   TargetLowering::DAGCombinerInfo &DCI) { @@ -2725,6 +3136,7 @@ static SDValue PerformORCombine(SDNode *N,                                  const AArch64Subtarget *Subtarget) {    SelectionDAG &DAG = DCI.DAG; +  SDLoc DL(N);    EVT VT = N->getValueType(0);    if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) @@ -2745,6 +3157,44 @@ static SDValue PerformORCombine(SDNode *N,    if (Res.getNode())      return Res; +  if (!Subtarget->hasNEON()) +    return SDValue(); + +  // Attempt to use vector immediate-form BSL +  // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + +  SDValue N0 = N->getOperand(0); +  if (N0.getOpcode() != ISD::AND) +    return SDValue(); + +  SDValue N1 = N->getOperand(1); +  if (N1.getOpcode() != ISD::AND) +    return SDValue(); + +  if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { +    APInt SplatUndef; +    unsigned SplatBitSize; +    bool HasAnyUndefs; +    BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); +    APInt SplatBits0; +    if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, +                                      HasAnyUndefs) && +        !HasAnyUndefs) { +      BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); +      APInt SplatBits1; +      if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, +                                        HasAnyUndefs) && +          !HasAnyUndefs && SplatBits0 == ~SplatBits1) { +        // Canonicalize the vector type to make instruction selection simpler. +        EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8; +        SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT, +                                     N0->getOperand(1), N0->getOperand(0), +                                     N1->getOperand(0)); +        return DAG.getNode(ISD::BITCAST, DL, VT, Result); +      } +    } +  } +    return SDValue();  } @@ -2819,6 +3269,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {    return false;  } +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. +SDValue +AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, +                                         const AArch64Subtarget *ST) const { + +  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); +  SDLoc DL(Op); +  EVT VT = Op.getValueType(); + +  APInt SplatBits, SplatUndef; +  unsigned SplatBitSize; +  bool HasAnyUndefs; + +  // Note we favor lowering MOVI over MVNI. +  // This has implications on the definition of patterns in TableGen to select +  // BIC immediate instructions but not ORR immediate instructions. +  // If this lowering order is changed, TableGen patterns for BIC immediate and +  // ORR immediate instructions have to be updated. +  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { +    if (SplatBitSize <= 64) { +      // First attempt to use vector immediate-form MOVI +      EVT NeonMovVT; +      unsigned Imm = 0; +      unsigned OpCmode = 0; + +      if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), +                            SplatBitSize, DAG, VT.is128BitVector(), +                            Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) { +        SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); +        SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); + +        if (ImmVal.getNode() && OpCmodeVal.getNode()) { +          SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT, +                                        ImmVal, OpCmodeVal); +          return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); +        } +      } + +      // Then attempt to use vector immediate-form MVNI +      uint64_t NegatedImm = (~SplatBits).getZExtValue(); +      if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, +                            DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT, +                            Imm, OpCmode)) { +        SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); +        SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); +        if (ImmVal.getNode() && OpCmodeVal.getNode()) { +          SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT, +                                        ImmVal, OpCmodeVal); +          return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); +        } +      } + +      // Attempt to use vector immediate-form FMOV +      if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) || +          (VT == MVT::v2f64 && SplatBitSize == 64)) { +        APFloat RealVal( +            SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble, +            SplatBits); +        uint32_t ImmVal; +        if (A64Imms::isFPImm(RealVal, ImmVal)) { +          SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); +          return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val); +        } +      } +    } +  } +  return SDValue(); +} +  AArch64TargetLowering::ConstraintType  AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {    if (Constraint.size() == 1) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 320346e60b7..67a908e24ef 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -111,7 +111,28 @@ namespace AArch64ISD {      // created using the small memory model style: i.e. adrp/add or      // adrp/mem-op. This exists to prevent bare TargetAddresses which may never      // get selected. -    WrapperSmall +    WrapperSmall, + +    // Vector bitwise select +    NEON_BSL, + +    // Vector move immediate +    NEON_MOVIMM, + +    // Vector Move Inverted Immediate +    NEON_MVNIMM, + +    // Vector FP move immediate +    NEON_FMOVIMM, + +    // Vector compare +    NEON_CMP, + +    // Vector compare zero +    NEON_CMPZ, + +    // Vector compare bitwise test +    NEON_TST    };  } @@ -148,9 +169,11 @@ public:                            SDLoc dl, SelectionDAG &DAG,                            SmallVectorImpl<SDValue> &InVals) const; -  void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, -                           SDLoc DL, SDValue &Chain) const; +  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, +                            const AArch64Subtarget *ST) const; +  void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, +                           SDValue &Chain) const;    /// IsEligibleForTailCallOptimization - Check whether the call is eligible    /// for tail call optimization. Targets which want to do tail call @@ -253,6 +276,10 @@ private:      return &getTargetMachine().getSubtarget<AArch64Subtarget>();    }  }; +enum NeonModImmType { +  Neon_Mov_Imm, +  Neon_Mvn_Imm +};  } // namespace llvm  #endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 9dd122f1494..09451fdc45d 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -959,3 +959,96 @@ class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,    let Inst{4-0}   = op4;  } + +//===----------------------------------------------------------------------===// +// +// Neon Instruction Format Definitions. +// + +let Predicates = [HasNEON] in { + +class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1> +  : InstAlias<Asm, Result, Emit> { +} + +// Format AdvSIMD 3 vector registers with same vector type +class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode, +                   dag outs, dag ins, string asmstr, +                   list<dag> patterns, InstrItinClass itin> +  : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ +  let Inst{31} = 0b0; +  let Inst{30} = q; +  let Inst{29} = u; +  let Inst{28-24} = 0b01110; +  let Inst{23-22} = size; +  let Inst{21} = 0b1; +   // Inherit Rm in 20-16 +  let Inst{15-11} = opcode; +  let Inst{10} = 0b1; +  // Inherit Rn in 9-5 +  // Inherit Rd in 4-0 +} + +// Format AdvSIMD 1 vector register with modified immediate +class NeonI_1VModImm<bit q, bit op, +                     dag outs, dag ins, string asmstr, +                     list<dag> patterns, InstrItinClass itin> +  : A64InstRd<outs,ins, asmstr, patterns, itin> +{ +  bits<8> Imm; +  bits<4> cmode; +  let Inst{31} = 0b0; +  let Inst{30} = q; +  let Inst{29} = op; +  let Inst{28-19} = 0b0111100000; +  let Inst{15-12} = cmode; +  let Inst{11} = 0b0; // o2 +  let Inst{10} = 1; +  // Inherit Rd in 4-0 +  let Inst{18-16} = Imm{7-5}; // imm a:b:c +  let Inst{9-5} = Imm{4-0};   // imm d:e:f:g:h +} + +// Format AdvSIMD 3 scalar registers with same type + +class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode, +                          dag outs, dag ins, string asmstr, +                          list<dag> patterns, InstrItinClass itin> +  : A64InstRdnm<outs, ins, asmstr, patterns, itin> +{ +  let Inst{31} = 0b0; +  let Inst{30} = 0b1; +  let Inst{29} = u; +  let Inst{28-24} = 0b11110; +  let Inst{23-22} = size; +  let Inst{21} = 0b1; +   // Inherit Rm in 20-16 +  let Inst{15-11} = opcode; +  let Inst{10} = 0b1; +  // Inherit Rn in 9-5 +  // Inherit Rd in 4-0 +} + + +// Format AdvSIMD 2 vector registers miscellaneous +class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode, +                   dag outs, dag ins, string asmstr, +                   list<dag> patterns, InstrItinClass itin> +  : A64InstRdn<outs, ins, asmstr, patterns, itin> +{ +  let Inst{31} = 0b0; +  let Inst{30} = q; +  let Inst{29} = u; +  let Inst{28-24} = 0b01110; +  let Inst{23-22} = size; +  let Inst{21-17} = 0b10000; +  let Inst{16-12} = opcode; +  let Inst{11-10} = 0b10; + +  // Inherit Rn in 9-5 +  // Inherit Rd in 4-0 +} + +} + diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 725a12164be..07289b0be14 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -11,6 +11,17 @@  //  //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasNEON          : Predicate<"Subtarget->hasNEON()">, +                                 AssemblerPredicate<"FeatureNEON", "neon">; +def HasCrypto        : Predicate<"Subtarget->hasCrypto()">, +                                 AssemblerPredicate<"FeatureCrypto","crypto">; + +// Use fused MAC if more precision in FP computation is allowed. +def UseFusedMAC      : Predicate<"(TM.Options.AllowFPOpFusion ==" +                                 " FPOpFusion::Fast)">;  include "AArch64InstrFormats.td"  //===----------------------------------------------------------------------===// @@ -2173,6 +2184,29 @@ def FMSUBdddd  : A64I_fpdp3Impl<"fmsub",  FPR64, f64, 0b01, 0b0, 0b1, fmsub>;  def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;  def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; +// Extra patterns for when we're allowed to optimise separate multiplication and +// addition. +let Predicates = [UseFusedMAC] in { +def : Pat<(fadd FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +          (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; +def : Pat<(fsub FPR32:$Ra, (fmul FPR32:$Rn, FPR32:$Rm)), +          (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; +def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra), +          (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; +def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), +          (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), +          (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), +          (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra), +          (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)), +          (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +} + +  //===----------------------------------------------------------------------===//  // Floating-point <-> fixed-point conversion instructions  //===----------------------------------------------------------------------===// @@ -5123,3 +5157,9 @@ defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),  defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),                     (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD (NEON) Support +// + +include "AArch64InstrNEON.td"
\ No newline at end of file diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td new file mode 100644 index 00000000000..98b9e3e1158 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -0,0 +1,1634 @@ +//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the AArch64 NEON instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// NEON-specific DAG Nodes. +//===----------------------------------------------------------------------===// +def Neon_bsl       : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3, +                      [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, +                      SDTCisSameAs<0, 3>]>>; + +// (outs Result), (ins Imm, OpCmode) +def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>; + +def Neon_movi     : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>; + +def Neon_mvni     : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>; + +// (outs Result), (ins Imm) +def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1, +                        [SDTCisVec<0>, SDTCisVT<1, i32>]>>; + +// (outs Result), (ins LHS, RHS, CondCode) +def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3, +                 [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>; + +// (outs Result), (ins LHS, 0/0.0 constant, CondCode) +def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, +                 [SDTCisVec<0>,  SDTCisVec<1>]>>; + +// (outs Result), (ins LHS, RHS) +def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, +                 [SDTCisVec<0>,  SDTCisSameAs<1, 2>]>>; + +//===----------------------------------------------------------------------===// +// Multiclasses +//===----------------------------------------------------------------------===// + +multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size,  bits<5> opcode, +                                string asmop, SDPatternOperator opnode8B, +                                SDPatternOperator opnode16B, +                                bit Commutable = 0> +{ +  let isCommutable = Commutable in { +    def _8B :  NeonI_3VSame<0b0, u, size, opcode, +               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), +               asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", +               [(set (v8i8 VPR64:$Rd), +                  (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], +               NoItinerary>; + +    def _16B : NeonI_3VSame<0b1, u, size, opcode, +               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), +               asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", +               [(set (v16i8 VPR128:$Rd), +                  (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], +               NoItinerary>; +  } + +} + +multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode, +                                  string asmop, SDPatternOperator opnode, +                                  bit Commutable = 0> +{ +  let isCommutable = Commutable in { +    def _4H : NeonI_3VSame<0b0, u, 0b01, opcode, +              (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), +              asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", +              [(set (v4i16 VPR64:$Rd), +                 (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], +              NoItinerary>; + +    def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, +              (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), +              asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", +              [(set (v8i16 VPR128:$Rd), +                 (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], +              NoItinerary>; + +    def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, +              (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), +              asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", +              [(set (v2i32 VPR64:$Rd), +                 (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], +              NoItinerary>; + +    def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, +              (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), +              asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", +              [(set (v4i32 VPR128:$Rd), +                 (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], +              NoItinerary>; +  } +} +multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode, +                                  string asmop, SDPatternOperator opnode, +                                  bit Commutable = 0> +   : NeonI_3VSame_HS_sizes<u, opcode,  asmop, opnode, Commutable> +{ +  let isCommutable = Commutable in { +    def _8B :  NeonI_3VSame<0b0, u, 0b00, opcode, +               (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), +               asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", +               [(set (v8i8 VPR64:$Rd), +                  (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], +               NoItinerary>; + +    def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, +               (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), +               asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", +               [(set (v16i8 VPR128:$Rd), +                  (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], +               NoItinerary>; +  } +} + +multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode, +                                   string asmop, SDPatternOperator opnode, +                                   bit Commutable = 0> +   : NeonI_3VSame_BHS_sizes<u, opcode,  asmop, opnode, Commutable> +{ +  let isCommutable = Commutable in { +    def _2D : NeonI_3VSame<0b1, u, 0b11, opcode, +              (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), +              asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", +              [(set (v2i64 VPR128:$Rd), +                 (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], +              NoItinerary>; +  } +} + +// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types, +// but Result types can be integer or floating point types. +multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode, +                                 string asmop, SDPatternOperator opnode2S, +                                 SDPatternOperator opnode4S, +                                 SDPatternOperator opnode2D, +                                 ValueType ResTy2S, ValueType ResTy4S, +                                 ValueType ResTy2D, bit Commutable = 0> +{ +  let isCommutable = Commutable in { +    def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode, +              (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), +              asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", +              [(set (ResTy2S VPR64:$Rd), +                 (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], +              NoItinerary>; + +    def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, +              (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), +              asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", +              [(set (ResTy4S VPR128:$Rd), +                 (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], +              NoItinerary>; + +    def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, +              (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), +              asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", +              [(set (ResTy2D VPR128:$Rd), +                 (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], +               NoItinerary>; +  } +} + +//===----------------------------------------------------------------------===// +// Instruction Definitions +//===----------------------------------------------------------------------===// + +// Vector Arithmetic Instructions + +// Vector Add (Integer and Floating-Point) + +defm ADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; +defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd, +                                     v2f32, v4f32, v2f64, 1>; + +// Vector Sub (Integer and Floating-Point) + +defm SUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; +defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub, +                                     v2f32, v4f32, v2f64, 0>; + +// Vector Multiply (Integer and Floating-Point) + +defm MULvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; +defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul, +                                     v2f32, v4f32, v2f64, 1>; + +// Vector Multiply (Polynomial) + +defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", +                                    int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; + +// Vector Multiply-accumulate and Multiply-subtract (Integer) + +// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and +// two operands constraints. +class NeonI_3VSame_Constraint_impl<string asmop, string asmlane, +  RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode, +  SDPatternOperator opnode> +  : NeonI_3VSame<q, u, size, opcode, +    (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm), +    asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane, +    [(set (OpTy VPRC:$Rd), +       (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))], +    NoItinerary> { +  let Constraints = "$src = $Rd"; +} + +def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), +                       (add node:$Ra, (mul node:$Rn, node:$Rm))>; + +def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), +                       (sub node:$Ra, (mul node:$Rn, node:$Rm))>; + + +def MLAvvv_8B:  NeonI_3VSame_Constraint_impl<"mla", ".8b",  VPR64,  v8i8, +                                             0b0, 0b0, 0b00, 0b10010, Neon_mla>; +def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, +                                             0b1, 0b0, 0b00, 0b10010, Neon_mla>; +def MLAvvv_4H:  NeonI_3VSame_Constraint_impl<"mla", ".4h",  VPR64,  v4i16, +                                             0b0, 0b0, 0b01, 0b10010, Neon_mla>; +def MLAvvv_8H:  NeonI_3VSame_Constraint_impl<"mla", ".8h",  VPR128, v8i16, +                                             0b1, 0b0, 0b01, 0b10010, Neon_mla>; +def MLAvvv_2S:  NeonI_3VSame_Constraint_impl<"mla", ".2s",  VPR64,  v2i32, +                                             0b0, 0b0, 0b10, 0b10010, Neon_mla>; +def MLAvvv_4S:  NeonI_3VSame_Constraint_impl<"mla", ".4s",  VPR128, v4i32, +                                             0b1, 0b0, 0b10, 0b10010, Neon_mla>; + +def MLSvvv_8B:  NeonI_3VSame_Constraint_impl<"mls", ".8b",  VPR64,  v8i8, +                                             0b0, 0b1, 0b00, 0b10010, Neon_mls>; +def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8, +                                             0b1, 0b1, 0b00, 0b10010, Neon_mls>; +def MLSvvv_4H:  NeonI_3VSame_Constraint_impl<"mls", ".4h",  VPR64,  v4i16, +                                             0b0, 0b1, 0b01, 0b10010, Neon_mls>; +def MLSvvv_8H:  NeonI_3VSame_Constraint_impl<"mls", ".8h",  VPR128, v8i16, +                                             0b1, 0b1, 0b01, 0b10010, Neon_mls>; +def MLSvvv_2S:  NeonI_3VSame_Constraint_impl<"mls", ".2s",  VPR64,  v2i32, +                                             0b0, 0b1, 0b10, 0b10010, Neon_mls>; +def MLSvvv_4S:  NeonI_3VSame_Constraint_impl<"mls", ".4s",  VPR128, v4i32, +                                             0b1, 0b1, 0b10, 0b10010, Neon_mls>; + +// Vector Multiply-accumulate and Multiply-subtract (Floating Point) + +def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), +                        (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>; + +def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), +                        (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>; + +let Predicates = [HasNEON, UseFusedMAC] in { +def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s",  VPR64,  v2f32, +                                             0b0, 0b0, 0b00, 0b11001, Neon_fmla>; +def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s",  VPR128, v4f32, +                                             0b1, 0b0, 0b00, 0b11001, Neon_fmla>; +def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d",  VPR128, v2f64, +                                             0b1, 0b0, 0b01, 0b11001, Neon_fmla>; + +def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s",  VPR64,  v2f32, +                                              0b0, 0b0, 0b10, 0b11001, Neon_fmls>; +def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s",  VPR128, v4f32, +                                             0b1, 0b0, 0b10, 0b11001, Neon_fmls>; +def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d",  VPR128, v2f64, +                                             0b1, 0b0, 0b11, 0b11001, Neon_fmls>; +} + +// We're also allowed to match the fma instruction regardless of compile +// options. +def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)), +          (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; +def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), +          (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; +def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), +          (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; + +def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)), +          (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; +def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), +          (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; +def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), +          (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; + +// Vector Divide (Floating-Point) + +defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv, +                                     v2f32, v4f32, v2f64, 0>; + +// Vector Bitwise Operations + +// Vector Bitwise AND + +defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>; + +// Vector Bitwise Exclusive OR + +defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>; + +// Vector Bitwise OR + +defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>; + +// ORR disassembled as MOV if Vn==Vm + +// Vector Move - register +// Alias for ORR if Vn=Vm and it is the preferred syntax +def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", +                    (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>; +def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", +                    (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>; + +def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{ +  ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0)); +  ConstantSDNode *OpCmodeConstVal = cast<ConstantSDNode>(N->getOperand(1)); +  unsigned EltBits; +  uint64_t EltVal = A64Imms::decodeNeonModImm(ImmConstVal->getZExtValue(), +    OpCmodeConstVal->getZExtValue(), EltBits); +  return (EltBits == 8 && EltVal == 0xff); +}]>; + + +def Neon_not8B  : PatFrag<(ops node:$in), +                          (xor node:$in, (bitconvert (v8i8 Neon_immAllOnes)))>; +def Neon_not16B : PatFrag<(ops node:$in), +                          (xor node:$in, (bitconvert (v16i8 Neon_immAllOnes)))>; + +def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm), +                         (or node:$Rn, (Neon_not8B node:$Rm))>; + +def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm), +                          (or node:$Rn, (Neon_not16B node:$Rm))>; + +def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm), +                         (and node:$Rn, (Neon_not8B node:$Rm))>; + +def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm), +                          (and node:$Rn, (Neon_not16B node:$Rm))>; + + +// Vector Bitwise OR NOT - register + +defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn", +                                   Neon_orn8B, Neon_orn16B, 0>; + +// Vector Bitwise Bit Clear (AND NOT) - register + +defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic", +                                   Neon_bic8B, Neon_bic16B, 0>; + +multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B, +                                   SDPatternOperator opnode16B, +                                   Instruction INST8B, +                                   Instruction INST16B> { +  def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)), +            (INST8B VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)), +            (INST8B VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)), +            (INST8B VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)), +            (INST16B VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)), +            (INST16B VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)), +            (INST16B VPR128:$Rn, VPR128:$Rm)>; +} + +// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN +defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>; +defm : Neon_bitwise2V_patterns<or,  or,  ORRvvv_8B, ORRvvv_16B>; +defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>; +defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>; +defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>; + +//   Vector Bitwise Select +def BSLvvv_8B  : NeonI_3VSame_Constraint_impl<"bsl", ".8b",  VPR64, v8i8, +                                              0b0, 0b1, 0b01, 0b00011, Neon_bsl>; + +def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8, +                                              0b1, 0b1, 0b01, 0b00011, Neon_bsl>; + +multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode, +                                   Instruction INST8B, +                                   Instruction INST16B> { +  // Disassociate type from instruction definition +  def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + +  // Allow to match BSL instruction pattern with non-constant operand +  def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd), +                    (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), +          (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd), +                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), +          (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd), +                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), +          (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd), +                     (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), +          (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd), +                     (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), +          (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd), +                     (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), +          (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd), +                     (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), +          (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd), +                     (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), +          (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; + +  // Allow to match llvm.arm.* intrinsics. +  def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src), +                    (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src), +                    (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src), +                    (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src), +                    (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src), +                    (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), +            (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; +  def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src), +                    (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src), +                    (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src), +                    (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src), +                    (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src), +                    (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +  def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src), +                    (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), +            (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +} + +// Additional patterns for bitwise instruction BSL +defm: Neon_bitwise3V_patterns<Neon_bsl, BSLvvv_8B, BSLvvv_16B>; + +def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm), +                           (Neon_bsl node:$src, node:$Rn, node:$Rm), +                           [{ (void)N; return false; }]>; + +// Vector Bitwise Insert if True + +def BITvvv_8B  : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64,   v8i8, +                   0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>; +def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8, +                   0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>; + +// Vector Bitwise Insert if False + +def BIFvvv_8B  : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64,  v8i8, +                                0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>; +def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8, +                                0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>; + +// Vector Absolute Difference and Accumulate (Signed, Unsigned) + +def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), +                       (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>; +def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), +                       (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>; + +// Vector Absolute Difference and Accumulate (Unsigned) +def UABAvvv_8B :  NeonI_3VSame_Constraint_impl<"uaba", ".8b",  VPR64,  v8i8, +                    0b0, 0b1, 0b00, 0b01111, Neon_uaba>; +def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8, +                    0b1, 0b1, 0b00, 0b01111, Neon_uaba>; +def UABAvvv_4H :  NeonI_3VSame_Constraint_impl<"uaba", ".4h",  VPR64,  v4i16, +                    0b0, 0b1, 0b01, 0b01111, Neon_uaba>; +def UABAvvv_8H :  NeonI_3VSame_Constraint_impl<"uaba", ".8h",  VPR128, v8i16, +                    0b1, 0b1, 0b01, 0b01111, Neon_uaba>; +def UABAvvv_2S :  NeonI_3VSame_Constraint_impl<"uaba", ".2s",  VPR64,  v2i32, +                    0b0, 0b1, 0b10, 0b01111, Neon_uaba>; +def UABAvvv_4S :  NeonI_3VSame_Constraint_impl<"uaba", ".4s",  VPR128, v4i32, +                    0b1, 0b1, 0b10, 0b01111, Neon_uaba>; + +// Vector Absolute Difference and Accumulate (Signed) +def SABAvvv_8B :  NeonI_3VSame_Constraint_impl<"saba", ".8b",  VPR64,  v8i8, +                    0b0, 0b0, 0b00, 0b01111, Neon_saba>; +def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8, +                    0b1, 0b0, 0b00, 0b01111, Neon_saba>; +def SABAvvv_4H :  NeonI_3VSame_Constraint_impl<"saba", ".4h",  VPR64,  v4i16, +                    0b0, 0b0, 0b01, 0b01111, Neon_saba>; +def SABAvvv_8H :  NeonI_3VSame_Constraint_impl<"saba", ".8h",  VPR128, v8i16, +                    0b1, 0b0, 0b01, 0b01111, Neon_saba>; +def SABAvvv_2S :  NeonI_3VSame_Constraint_impl<"saba", ".2s",  VPR64,  v2i32, +                    0b0, 0b0, 0b10, 0b01111, Neon_saba>; +def SABAvvv_4S :  NeonI_3VSame_Constraint_impl<"saba", ".4s",  VPR128, v4i32, +                    0b1, 0b0, 0b10, 0b01111, Neon_saba>; + + +// Vector Absolute Difference (Signed, Unsigned) +defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>; +defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>; + +// Vector Absolute Difference (Floating Point) +defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd", +                                    int_arm_neon_vabds, int_arm_neon_vabds, +                                    int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>; + +// Vector Reciprocal Step (Floating Point) +defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps", +                                       int_arm_neon_vrecps, int_arm_neon_vrecps, +                                       int_arm_neon_vrecps, +                                       v2f32, v4f32, v2f64, 0>; + +// Vector Reciprocal Square Root Step (Floating Point) +defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", +                                        int_arm_neon_vrsqrts, +                                        int_arm_neon_vrsqrts, +                                        int_arm_neon_vrsqrts, +                                        v2f32, v4f32, v2f64, 0>; + +// Vector Comparisons + +def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs), +                        (Neon_cmp node:$lhs, node:$rhs, SETEQ)>; +def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs), +                         (Neon_cmp node:$lhs, node:$rhs, SETUGE)>; +def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs), +                        (Neon_cmp node:$lhs, node:$rhs, SETGE)>; +def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs), +                        (Neon_cmp node:$lhs, node:$rhs, SETUGT)>; +def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), +                        (Neon_cmp node:$lhs, node:$rhs, SETGT)>; + +// NeonI_compare_aliases class: swaps register operands to implement +// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. +class NeonI_compare_aliases<string asmop, string asmlane, +                            Instruction inst, RegisterClass VPRC> +  : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane # +                    ", $Rm" # asmlane, +                  (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>; + +// Vector Comparisons (Integer) + +// Vector Compare Mask Equal (Integer) +let isCommutable =1 in { +defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>; +} + +// Vector Compare Mask Higher or Same (Unsigned Integer) +defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>; + +// Vector Compare Mask Greater Than or Equal (Integer) +defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>; + +// Vector Compare Mask Higher (Unsigned Integer) +defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>; + +// Vector Compare Mask Greater Than (Integer) +defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>; + +// Vector Compare Mask Bitwise Test (Integer) +defm CMTSTvvv:  NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>; + +// Vector Compare Mask Less or Same (Unsigned Integer) +// CMLS is alias for CMHS with operands reversed. +def CMLSvvv_8B  : NeonI_compare_aliases<"cmls", ".8b",  CMHSvvv_8B,  VPR64>; +def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>; +def CMLSvvv_4H  : NeonI_compare_aliases<"cmls", ".4h",  CMHSvvv_4H,  VPR64>; +def CMLSvvv_8H  : NeonI_compare_aliases<"cmls", ".8h",  CMHSvvv_8H,  VPR128>; +def CMLSvvv_2S  : NeonI_compare_aliases<"cmls", ".2s",  CMHSvvv_2S,  VPR64>; +def CMLSvvv_4S  : NeonI_compare_aliases<"cmls", ".4s",  CMHSvvv_4S,  VPR128>; +def CMLSvvv_2D  : NeonI_compare_aliases<"cmls", ".2d",  CMHSvvv_2D,  VPR128>; + +// Vector Compare Mask Less Than or Equal (Integer) +// CMLE is alias for CMGE with operands reversed. +def CMLEvvv_8B  : NeonI_compare_aliases<"cmle", ".8b",  CMGEvvv_8B,  VPR64>; +def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>; +def CMLEvvv_4H  : NeonI_compare_aliases<"cmle", ".4h",  CMGEvvv_4H,  VPR64>; +def CMLEvvv_8H  : NeonI_compare_aliases<"cmle", ".8h",  CMGEvvv_8H,  VPR128>; +def CMLEvvv_2S  : NeonI_compare_aliases<"cmle", ".2s",  CMGEvvv_2S,  VPR64>; +def CMLEvvv_4S  : NeonI_compare_aliases<"cmle", ".4s",  CMGEvvv_4S,  VPR128>; +def CMLEvvv_2D  : NeonI_compare_aliases<"cmle", ".2d",  CMGEvvv_2D,  VPR128>; + +// Vector Compare Mask Lower (Unsigned Integer) +// CMLO is alias for CMHI with operands reversed. +def CMLOvvv_8B  : NeonI_compare_aliases<"cmlo", ".8b",  CMHIvvv_8B,  VPR64>; +def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>; +def CMLOvvv_4H  : NeonI_compare_aliases<"cmlo", ".4h",  CMHIvvv_4H,  VPR64>; +def CMLOvvv_8H  : NeonI_compare_aliases<"cmlo", ".8h",  CMHIvvv_8H,  VPR128>; +def CMLOvvv_2S  : NeonI_compare_aliases<"cmlo", ".2s",  CMHIvvv_2S,  VPR64>; +def CMLOvvv_4S  : NeonI_compare_aliases<"cmlo", ".4s",  CMHIvvv_4S,  VPR128>; +def CMLOvvv_2D  : NeonI_compare_aliases<"cmlo", ".2d",  CMHIvvv_2D,  VPR128>; + +// Vector Compare Mask Less Than (Integer) +// CMLT is alias for CMGT with operands reversed. +def CMLTvvv_8B  : NeonI_compare_aliases<"cmlt", ".8b",  CMGTvvv_8B,  VPR64>; +def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>; +def CMLTvvv_4H  : NeonI_compare_aliases<"cmlt", ".4h",  CMGTvvv_4H,  VPR64>; +def CMLTvvv_8H  : NeonI_compare_aliases<"cmlt", ".8h",  CMGTvvv_8H,  VPR128>; +def CMLTvvv_2S  : NeonI_compare_aliases<"cmlt", ".2s",  CMGTvvv_2S,  VPR64>; +def CMLTvvv_4S  : NeonI_compare_aliases<"cmlt", ".4s",  CMGTvvv_4S,  VPR128>; +def CMLTvvv_2D  : NeonI_compare_aliases<"cmlt", ".2d",  CMGTvvv_2D,  VPR128>; + + +def neon_uimm0_asmoperand : AsmOperandClass +{ +  let Name = "UImm0"; +  let PredicateMethod = "isUImm<0>"; +  let RenderMethod = "addImmOperands"; +} + +def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> { +  let ParserMatchClass = neon_uimm0_asmoperand; +  let PrintMethod = "printNeonUImm0Operand"; + +} + +multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC> +{ +  def _8B :  NeonI_2VMisc<0b0, u, 0b00, opcode, +             (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), +             asmop # "\t$Rd.8b, $Rn.8b, $Imm", +             [(set (v8i8 VPR64:$Rd), +                (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], +             NoItinerary>; + +  def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, +             (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), +             asmop # "\t$Rd.16b, $Rn.16b, $Imm", +             [(set (v16i8 VPR128:$Rd), +                (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], +             NoItinerary>; + +  def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, +            (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), +            asmop # "\t$Rd.4h, $Rn.4h, $Imm", +            [(set (v4i16 VPR64:$Rd), +               (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], +            NoItinerary>; + +  def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, +            (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), +            asmop # "\t$Rd.8h, $Rn.8h, $Imm", +            [(set (v8i16 VPR128:$Rd), +               (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], +            NoItinerary>; + +  def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, +            (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), +            asmop # "\t$Rd.2s, $Rn.2s, $Imm", +            [(set (v2i32 VPR64:$Rd), +               (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], +            NoItinerary>; + +  def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, +            (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), +            asmop # "\t$Rd.4s, $Rn.4s, $Imm", +            [(set (v4i32 VPR128:$Rd), +               (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], +            NoItinerary>; + +  def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, +            (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), +            asmop # "\t$Rd.2d, $Rn.2d, $Imm", +            [(set (v2i64 VPR128:$Rd), +               (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], +            NoItinerary>; +} + +// Vector Compare Mask Equal to Zero (Integer) +defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>; + +// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer) +defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>; + +// Vector Compare Mask Greater Than Zero (Signed Integer) +defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>; + +// Vector Compare Mask Less Than or Equal To Zero (Signed Integer) +defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>; + +// Vector Compare Mask Less Than Zero (Signed Integer) +defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>; + +// Vector Comparisons (Floating Point) + +// Vector Compare Mask Equal (Floating Point) +let isCommutable =1 in { +defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq, +                                      Neon_cmeq, Neon_cmeq, +                                      v2i32, v4i32, v2i64, 0>; +} + +// Vector Compare Mask Greater Than Or Equal (Floating Point) +defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge, +                                      Neon_cmge, Neon_cmge, +                                      v2i32, v4i32, v2i64, 0>; + +// Vector Compare Mask Greater Than (Floating Point) +defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt, +                                      Neon_cmgt, Neon_cmgt, +                                      v2i32, v4i32, v2i64, 0>; + +// Vector Compare Mask Less Than Or Equal (Floating Point) +// FCMLE is alias for FCMGE with operands reversed. +def FCMLEvvv_2S  : NeonI_compare_aliases<"fcmle", ".2s",  FCMGEvvv_2S,  VPR64>; +def FCMLEvvv_4S  : NeonI_compare_aliases<"fcmle", ".4s",  FCMGEvvv_4S,  VPR128>; +def FCMLEvvv_2D  : NeonI_compare_aliases<"fcmle", ".2d",  FCMGEvvv_2D,  VPR128>; + +// Vector Compare Mask Less Than (Floating Point) +// FCMLT is alias for FCMGT with operands reversed. +def FCMLTvvv_2S  : NeonI_compare_aliases<"fcmlt", ".2s",  FCMGTvvv_2S,  VPR64>; +def FCMLTvvv_4S  : NeonI_compare_aliases<"fcmlt", ".4s",  FCMGTvvv_4S,  VPR128>; +def FCMLTvvv_2D  : NeonI_compare_aliases<"fcmlt", ".2d",  FCMGTvvv_2D,  VPR128>; + + +multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode, +                              string asmop, CondCode CC> +{ +  def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode, +            (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm), +            asmop # "\t$Rd.2s, $Rn.2s, $FPImm", +            [(set (v2i32 VPR64:$Rd), +               (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))], +            NoItinerary>; + +  def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, +            (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), +            asmop # "\t$Rd.4s, $Rn.4s, $FPImm", +            [(set (v4i32 VPR128:$Rd), +               (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], +            NoItinerary>; + +  def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, +            (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), +            asmop # "\t$Rd.2d, $Rn.2d, $FPImm", +            [(set (v2i64 VPR128:$Rd), +               (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], +            NoItinerary>; +} + +// Vector Compare Mask Equal to Zero (Floating Point) +defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>; + +// Vector Compare Mask Greater Than or Equal to Zero (Floating Point) +defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>; + +// Vector Compare Mask Greater Than Zero (Floating Point) +defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>; + +// Vector Compare Mask Less Than or Equal To Zero (Floating Point) +defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>; + +// Vector Compare Mask Less Than Zero (Floating Point) +defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>; + +// Vector Absolute Comparisons (Floating Point) + +// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point) +defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge", +                                      int_arm_neon_vacged, int_arm_neon_vacgeq, +                                      int_aarch64_neon_vacgeq, +                                      v2i32, v4i32, v2i64, 0>; + +// Vector Absolute Compare Mask Greater Than (Floating Point) +defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt", +                                      int_arm_neon_vacgtd, int_arm_neon_vacgtq, +                                      int_aarch64_neon_vacgtq, +                                      v2i32, v4i32, v2i64, 0>; + +// Vector Absolute Compare Mask Less Than Or Equal (Floating Point) +// FACLE is alias for FACGE with operands reversed. +def FACLEvvv_2S  : NeonI_compare_aliases<"facle", ".2s",  FACGEvvv_2S,  VPR64>; +def FACLEvvv_4S  : NeonI_compare_aliases<"facle", ".4s",  FACGEvvv_4S,  VPR128>; +def FACLEvvv_2D  : NeonI_compare_aliases<"facle", ".2d",  FACGEvvv_2D,  VPR128>; + +// Vector Absolute Compare Mask Less Than (Floating Point) +// FACLT is alias for FACGT with operands reversed. +def FACLTvvv_2S  : NeonI_compare_aliases<"faclt", ".2s",  FACGTvvv_2S,  VPR64>; +def FACLTvvv_4S  : NeonI_compare_aliases<"faclt", ".4s",  FACGTvvv_4S,  VPR128>; +def FACLTvvv_2D  : NeonI_compare_aliases<"faclt", ".2d",  FACGTvvv_2D,  VPR128>; + +// Vector halving add (Integer Signed, Unsigned) +defm SHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd", +                                        int_arm_neon_vhadds, 1>; +defm UHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd", +                                        int_arm_neon_vhaddu, 1>; + +// Vector halving sub (Integer Signed, Unsigned) +defm SHSUBvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub", +                                        int_arm_neon_vhsubs, 0>; +defm UHSUBvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub", +                                        int_arm_neon_vhsubu, 0>; + +// Vector rouding halving add (Integer Signed, Unsigned) +defm SRHADDvvv :  NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd", +                                         int_arm_neon_vrhadds, 1>; +defm URHADDvvv :  NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd", +                                         int_arm_neon_vrhaddu, 1>; + +// Vector Saturating add (Integer Signed, Unsigned) +defm SQADDvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd", +                   int_arm_neon_vqadds, 1>; +defm UQADDvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd", +                   int_arm_neon_vqaddu, 1>; + +// Vector Saturating sub (Integer Signed, Unsigned) +defm SQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub", +                   int_arm_neon_vqsubs, 1>; +defm UQSUBvvv :  NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub", +                   int_arm_neon_vqsubu, 1>; + +// Vector Shift Left (Signed and Unsigned Integer) +defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl", +                 int_arm_neon_vshifts, 1>; +defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl", +                 int_arm_neon_vshiftu, 1>; + +// Vector Saturating Shift Left (Signed and Unsigned Integer) +defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl", +                  int_arm_neon_vqshifts, 1>; +defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl", +                  int_arm_neon_vqshiftu, 1>; + +// Vector Rouding Shift Left (Signed and Unsigned Integer) +defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl", +                  int_arm_neon_vrshifts, 1>; +defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl", +                  int_arm_neon_vrshiftu, 1>; + +// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer) +defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl", +                   int_arm_neon_vqrshifts, 1>; +defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl", +                   int_arm_neon_vqrshiftu, 1>; + +// Vector Maximum (Signed and Unsigned Integer) +defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>; +defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>; + +// Vector Minimum (Signed and Unsigned Integer) +defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>; +defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>; + +// Vector Maximum (Floating Point) +defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax", +                                     int_arm_neon_vmaxs, int_arm_neon_vmaxs, +                                     int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>; + +// Vector Minimum (Floating Point) +defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin", +                                     int_arm_neon_vmins, int_arm_neon_vmins, +                                     int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>; + +// Vector maxNum (Floating Point) -  prefer a number over a quiet NaN) +defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm", +                                       int_aarch64_neon_vmaxnm, +                                       int_aarch64_neon_vmaxnm, +                                       int_aarch64_neon_vmaxnm, +                                       v2f32, v4f32, v2f64, 1>; + +// Vector minNum (Floating Point) - prefer a number over a quiet NaN) +defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm", +                                       int_aarch64_neon_vminnm, +                                       int_aarch64_neon_vminnm, +                                       int_aarch64_neon_vminnm, +                                       v2f32, v4f32, v2f64, 1>; + +// Vector Maximum Pairwise (Signed and Unsigned Integer) +defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>; +defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>; + +// Vector Minimum Pairwise (Signed and Unsigned Integer) +defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>; +defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>; + +// Vector Maximum Pairwise (Floating Point) +defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp", +                                     int_arm_neon_vpmaxs, int_arm_neon_vpmaxs, +                                     int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>; + +// Vector Minimum Pairwise (Floating Point) +defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp", +                                     int_arm_neon_vpmins, int_arm_neon_vpmins, +                                     int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>; + +// Vector maxNum Pairwise (Floating Point) -  prefer a number over a quiet NaN) +defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp", +                                       int_aarch64_neon_vpmaxnm, +                                       int_aarch64_neon_vpmaxnm, +                                       int_aarch64_neon_vpmaxnm, +                                       v2f32, v4f32, v2f64, 1>; + +// Vector minNum Pairwise (Floating Point) -  prefer a number over a quiet NaN) +defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp", +                                       int_aarch64_neon_vpminnm, +                                       int_aarch64_neon_vpminnm, +                                       int_aarch64_neon_vpminnm, +                                       v2f32, v4f32, v2f64, 1>; + +// Vector Addition Pairwise (Integer) +defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>; + +// Vector Addition Pairwise (Floating Point) +defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", +                                       int_arm_neon_vpadd, +                                       int_arm_neon_vpadd, +                                       int_arm_neon_vpadd, +                                       v2f32, v4f32, v2f64, 1>; + +// Vector Saturating Doubling Multiply High +defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", +                    int_arm_neon_vqdmulh, 1>; + +// Vector Saturating Rouding Doubling Multiply High +defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", +                     int_arm_neon_vqrdmulh, 1>; + +// Vector Multiply Extended (Floating Point) +defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", +                                      int_aarch64_neon_vmulx, +                                      int_aarch64_neon_vmulx, +                                      int_aarch64_neon_vmulx, +                                      v2f32, v4f32, v2f64, 1>; + +// Vector Immediate Instructions + +multiclass neon_mov_imm_shift_asmoperands<string PREFIX> +{ +  def _asmoperand : AsmOperandClass +    { +      let Name = "NeonMovImmShift" # PREFIX; +      let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands"; +      let PredicateMethod = "isNeonMovImmShift" # PREFIX; +    } +} + +// Definition of vector immediates shift operands + +// The selectable use-cases extract the shift operation +// information from the OpCmode fields encoded in the immediate. +def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{ +  uint64_t OpCmode = N->getZExtValue(); +  unsigned ShiftImm; +  unsigned ShiftOnesIn; +  unsigned HasShift = +    A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); +  if (!HasShift) return SDValue(); +  return CurDAG->getTargetConstant(ShiftImm, MVT::i32); +}]>; + +// Vector immediates shift operands which accept LSL and MSL +// shift operators with shift value in the range of 0, 8, 16, 24 (LSL), +// or 0, 8 (LSLH) or 8, 16 (MSL). +defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">; +defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">; +// LSLH restricts shift amount to  0, 8 out of 0, 8, 16, 24 +defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">; + +multiclass neon_mov_imm_shift_operands<string PREFIX, +                                       string HALF, string ISHALF, code pred> +{ +   def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM> +    { +      let PrintMethod = +        "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">"; +      let DecoderMethod = +        "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">"; +      let ParserMatchClass = +        !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand"); +    } +} + +defm neon_mov_imm_LSL  : neon_mov_imm_shift_operands<"LSL", "", "false", [{ +  unsigned ShiftImm; +  unsigned ShiftOnesIn; +  unsigned HasShift = +    A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); +  return (HasShift && !ShiftOnesIn); +}]>; + +defm neon_mov_imm_MSL  : neon_mov_imm_shift_operands<"MSL", "", "false", [{ +  unsigned ShiftImm; +  unsigned ShiftOnesIn; +  unsigned HasShift = +    A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); +  return (HasShift && ShiftOnesIn); +}]>; + +defm neon_mov_imm_LSLH  : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ +  unsigned ShiftImm; +  unsigned ShiftOnesIn; +  unsigned HasShift = +    A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); +  return (HasShift && !ShiftOnesIn); +}]>; + +def neon_uimm8_asmoperand : AsmOperandClass +{ +  let Name = "UImm8"; +  let PredicateMethod = "isUImm<8>"; +  let RenderMethod = "addImmOperands"; +} + +def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> { +  let ParserMatchClass = neon_uimm8_asmoperand; +  let PrintMethod = "printNeonUImm8Operand"; +} + +def neon_uimm64_mask_asmoperand : AsmOperandClass +{ +  let Name = "NeonUImm64Mask"; +  let PredicateMethod = "isNeonUImm64Mask"; +  let RenderMethod = "addNeonUImm64MaskOperands"; +} + +// MCOperand for 64-bit bytemask with each byte having only the +// value 0x00 and 0xff is encoded as an unsigned 8-bit value +def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> { +  let ParserMatchClass = neon_uimm64_mask_asmoperand; +  let PrintMethod = "printNeonUImm64MaskOperand"; +} + +multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op, +                                   SDPatternOperator opnode> +{ +    // shift zeros, per word +    def _2S  : NeonI_1VModImm<0b0, op, +                              (outs VPR64:$Rd), +                              (ins neon_uimm8:$Imm, +                                neon_mov_imm_LSL_operand:$Simm), +                              !strconcat(asmop, " $Rd.2s, $Imm$Simm"), +                              [(set (v2i32 VPR64:$Rd), +                                 (v2i32 (opnode (timm:$Imm), +                                   (neon_mov_imm_LSL_operand:$Simm))))], +                              NoItinerary> { +       bits<2> Simm; +       let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; +     } + +    def _4S  : NeonI_1VModImm<0b1, op, +                              (outs VPR128:$Rd), +                              (ins neon_uimm8:$Imm, +                                neon_mov_imm_LSL_operand:$Simm), +                              !strconcat(asmop, " $Rd.4s, $Imm$Simm"), +                              [(set (v4i32 VPR128:$Rd), +                                 (v4i32 (opnode (timm:$Imm), +                                   (neon_mov_imm_LSL_operand:$Simm))))], +                              NoItinerary> { +      bits<2> Simm; +      let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; +    } + +    // shift zeros, per halfword +    def _4H  : NeonI_1VModImm<0b0, op, +                              (outs VPR64:$Rd), +                              (ins neon_uimm8:$Imm, +                                neon_mov_imm_LSLH_operand:$Simm), +                              !strconcat(asmop, " $Rd.4h, $Imm$Simm"), +                              [(set (v4i16 VPR64:$Rd), +                                 (v4i16 (opnode (timm:$Imm), +                                   (neon_mov_imm_LSLH_operand:$Simm))))], +                              NoItinerary> { +      bit  Simm; +      let cmode = {0b1, 0b0, Simm, 0b0}; +    } + +    def _8H  : NeonI_1VModImm<0b1, op, +                              (outs VPR128:$Rd), +                              (ins neon_uimm8:$Imm, +                                neon_mov_imm_LSLH_operand:$Simm), +                              !strconcat(asmop, " $Rd.8h, $Imm$Simm"), +                              [(set (v8i16 VPR128:$Rd), +                                 (v8i16 (opnode (timm:$Imm), +                                   (neon_mov_imm_LSLH_operand:$Simm))))], +                              NoItinerary> { +      bit Simm; +      let cmode = {0b1, 0b0, Simm, 0b0}; +     } +} + +multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op, +                                                   SDPatternOperator opnode, +                                                   SDPatternOperator neonopnode> +{ +  let Constraints = "$src = $Rd" in { +    // shift zeros, per word +    def _2S  : NeonI_1VModImm<0b0, op, +                 (outs VPR64:$Rd), +                 (ins VPR64:$src, neon_uimm8:$Imm, +                   neon_mov_imm_LSL_operand:$Simm), +                 !strconcat(asmop, " $Rd.2s, $Imm$Simm"), +                 [(set (v2i32 VPR64:$Rd), +                    (v2i32 (opnode (v2i32 VPR64:$src), +                      (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm, +                        neon_mov_imm_LSL_operand:$Simm)))))))], +                 NoItinerary> { +      bits<2> Simm; +      let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; +    } + +    def _4S  : NeonI_1VModImm<0b1, op, +                 (outs VPR128:$Rd), +                 (ins VPR128:$src, neon_uimm8:$Imm, +                   neon_mov_imm_LSL_operand:$Simm), +                 !strconcat(asmop, " $Rd.4s, $Imm$Simm"), +                 [(set (v4i32 VPR128:$Rd), +                    (v4i32 (opnode (v4i32 VPR128:$src), +                      (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm, +                        neon_mov_imm_LSL_operand:$Simm)))))))], +                 NoItinerary> { +      bits<2> Simm; +      let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; +    } + +    // shift zeros, per halfword +    def _4H  : NeonI_1VModImm<0b0, op, +                 (outs VPR64:$Rd), +                 (ins VPR64:$src, neon_uimm8:$Imm, +                   neon_mov_imm_LSLH_operand:$Simm), +                 !strconcat(asmop, " $Rd.4h, $Imm$Simm"), +                 [(set (v4i16 VPR64:$Rd), +                    (v4i16 (opnode (v4i16 VPR64:$src), +                       (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm, +                          neon_mov_imm_LSL_operand:$Simm)))))))], +                 NoItinerary> { +      bit  Simm; +      let cmode = {0b1, 0b0, Simm, 0b1}; +    } + +    def _8H  : NeonI_1VModImm<0b1, op, +                 (outs VPR128:$Rd), +                 (ins VPR128:$src, neon_uimm8:$Imm, +                   neon_mov_imm_LSLH_operand:$Simm), +                 !strconcat(asmop, " $Rd.8h, $Imm$Simm"), +                 [(set (v8i16 VPR128:$Rd), +                    (v8i16 (opnode (v8i16 VPR128:$src), +                      (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm, +                        neon_mov_imm_LSL_operand:$Simm)))))))], +                 NoItinerary> { +      bit Simm; +      let cmode = {0b1, 0b0, Simm, 0b1}; +    } +  } +} + +multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op, +                                   SDPatternOperator opnode> +{ +    // shift ones, per word +    def _2S  : NeonI_1VModImm<0b0, op, +                             (outs VPR64:$Rd), +                             (ins neon_uimm8:$Imm, +                               neon_mov_imm_MSL_operand:$Simm), +                             !strconcat(asmop, " $Rd.2s, $Imm$Simm"), +                              [(set (v2i32 VPR64:$Rd), +                                 (v2i32 (opnode (timm:$Imm), +                                   (neon_mov_imm_MSL_operand:$Simm))))], +                             NoItinerary> { +       bit Simm; +       let cmode = {0b1, 0b1, 0b0, Simm}; +     } + +   def _4S  : NeonI_1VModImm<0b1, op, +                              (outs VPR128:$Rd), +                              (ins neon_uimm8:$Imm, +                                neon_mov_imm_MSL_operand:$Simm), +                              !strconcat(asmop, " $Rd.4s, $Imm$Simm"), +                              [(set (v4i32 VPR128:$Rd), +                                 (v4i32 (opnode (timm:$Imm), +                                   (neon_mov_imm_MSL_operand:$Simm))))], +                              NoItinerary> { +     bit Simm; +     let cmode = {0b1, 0b1, 0b0, Simm}; +   } +} + +// Vector Move Immediate Shifted +let isReMaterializable = 1 in { +defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>; +} + +// Vector Move Inverted Immediate Shifted +let isReMaterializable = 1 in { +defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>; +} + +// Vector Bitwise Bit Clear (AND NOT) - immediate +let isReMaterializable = 1 in { +defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1, +                                                         and, Neon_mvni>; +} + +// Vector Bitwise OR - immedidate + +let isReMaterializable = 1 in { +defm ORRvi_lsl   : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0, +                                                           or, Neon_movi>; +} + +// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate +// LowerBUILD_VECTOR favors lowering MOVI over MVNI. +// BIC immediate instructions selection requires additional patterns to +// transform Neon_movi operands into BIC immediate operands + +def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{ +  uint64_t OpCmode = N->getZExtValue(); +  unsigned ShiftImm; +  unsigned ShiftOnesIn; +  (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); +  // LSLH restricts shift amount to  0, 8 which are encoded as 0 and 1 +  // Transform encoded shift amount 0 to 1 and 1 to 0. +  return CurDAG->getTargetConstant(!ShiftImm, MVT::i32); +}]>; + +def neon_mov_imm_LSLH_transform_operand +  : ImmLeaf<i32, [{ +    unsigned ShiftImm; +    unsigned ShiftOnesIn; +    unsigned HasShift = +      A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); +    return (HasShift && !ShiftOnesIn); }], +  neon_mov_imm_LSLH_transform_XFORM>; + +// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8) +// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00) +def : Pat<(v4i16 (and VPR64:$src, +            (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))), +          (BICvi_lsl_4H VPR64:$src, 0, +            neon_mov_imm_LSLH_transform_operand:$Simm)>; + +// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8) +// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00) +def : Pat<(v8i16 (and VPR128:$src, +            (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))), +          (BICvi_lsl_8H VPR128:$src, 0, +            neon_mov_imm_LSLH_transform_operand:$Simm)>; + + +multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode, +                                   SDPatternOperator neonopnode, +                                   Instruction INST4H, +                                   Instruction INST8H> { +  def : Pat<(v8i8 (opnode VPR64:$src, +                    (bitconvert(v4i16 (neonopnode timm:$Imm, +                      neon_mov_imm_LSLH_operand:$Simm))))), +            (INST4H VPR64:$src, neon_uimm8:$Imm, +              neon_mov_imm_LSLH_operand:$Simm)>; +  def : Pat<(v1i64 (opnode VPR64:$src, +                  (bitconvert(v4i16 (neonopnode timm:$Imm, +                    neon_mov_imm_LSLH_operand:$Simm))))), +          (INST4H VPR64:$src, neon_uimm8:$Imm, +            neon_mov_imm_LSLH_operand:$Simm)>; + +  def : Pat<(v16i8 (opnode VPR128:$src, +                   (bitconvert(v8i16 (neonopnode timm:$Imm, +                     neon_mov_imm_LSLH_operand:$Simm))))), +          (INST8H VPR128:$src, neon_uimm8:$Imm, +            neon_mov_imm_LSLH_operand:$Simm)>; +  def : Pat<(v4i32 (opnode VPR128:$src, +                   (bitconvert(v8i16 (neonopnode timm:$Imm, +                     neon_mov_imm_LSLH_operand:$Simm))))), +          (INST8H VPR128:$src, neon_uimm8:$Imm, +            neon_mov_imm_LSLH_operand:$Simm)>; +  def : Pat<(v2i64 (opnode VPR128:$src, +                   (bitconvert(v8i16 (neonopnode timm:$Imm, +                     neon_mov_imm_LSLH_operand:$Simm))))), +          (INST8H VPR128:$src, neon_uimm8:$Imm, +            neon_mov_imm_LSLH_operand:$Simm)>; +} + +// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate +defm : Neon_bitwiseVi_patterns<or, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H>; + +// Additional patterns for Vector Bitwise OR - immedidate +defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H>; + + +// Vector Move Immediate Masked +let isReMaterializable = 1 in { +defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>; +} + +// Vector Move Inverted Immediate Masked +let isReMaterializable = 1 in { +defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; +} + +class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane, +                                Instruction inst, RegisterClass VPRC> +  : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"), +                        (inst VPRC:$Rd, neon_uimm8:$Imm,  0), 0b0>; + +// Aliases for Vector Move Immediate Shifted +def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>; + +// Aliases for Vector Move Inverted Immediate Shifted +def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>; + +// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate +def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>; + +// Aliases for Vector Bitwise OR - immedidate +def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>; + +//  Vector Move Immediate - per byte +let isReMaterializable = 1 in { +def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, +                               (outs VPR64:$Rd), (ins neon_uimm8:$Imm), +                               "movi\t$Rd.8b, $Imm", +                               [(set (v8i8 VPR64:$Rd), +                                  (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], +                                NoItinerary> { +  let cmode = 0b1110; +} + +def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, +                                (outs VPR128:$Rd), (ins neon_uimm8:$Imm), +                                "movi\t$Rd.16b, $Imm", +                                [(set (v16i8 VPR128:$Rd), +                                   (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], +                                 NoItinerary> { +  let cmode = 0b1110; +} +} + +// Vector Move Immediate - bytemask, per double word +let isReMaterializable = 1 in { +def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, +                               (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm), +                               "movi\t $Rd.2d, $Imm", +                               [(set (v2i64 VPR128:$Rd), +                                  (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], +                               NoItinerary> { +  let cmode = 0b1110; +} +} + +// Vector Move Immediate - bytemask, one doubleword + +let isReMaterializable = 1 in { +def MOVIdi : NeonI_1VModImm<0b0, 0b1, +                           (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm), +                           "movi\t $Rd, $Imm", +                           [(set (f64 FPR64:$Rd), +                              (f64 (bitconvert +                                (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))))], +                           NoItinerary> { +  let cmode = 0b1110; +} +} + +// Vector Floating Point Move Immediate + +class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy, +                      Operand immOpType, bit q, bit op> +  : NeonI_1VModImm<q, op, +                   (outs VPRC:$Rd), (ins immOpType:$Imm), +                   "fmov\t$Rd" # asmlane # ", $Imm", +                   [(set (OpTy VPRC:$Rd), +                      (OpTy (Neon_fmovi (timm:$Imm))))], +                   NoItinerary> { +     let cmode = 0b1111; +   } + +let isReMaterializable = 1 in { +def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64,  v2f32, fmov32_operand, 0b0, 0b0>; +def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; +def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; +} + +// Scalar Arithmetic + +class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop> +  : NeonI_Scalar3Same<u, 0b11, opcode, +                (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm), +                !strconcat(asmop, " $Rd, $Rn, $Rm"), +                [], +                NoItinerary>; + +multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode, +                                        string asmop, bit Commutable = 0> +{ +  let isCommutable = Commutable in { +    def bbb : NeonI_Scalar3Same<u, 0b00, opcode, +                                (outs FPR8:$Rd), (ins FPR8:$Rn, FPR8:$Rm), +                                !strconcat(asmop, " $Rd, $Rn, $Rm"), +                                [], +                                NoItinerary>; +    def hhh : NeonI_Scalar3Same<u, 0b01, opcode, +                                (outs FPR16:$Rd), (ins FPR16:$Rn, FPR16:$Rm), +                                !strconcat(asmop, " $Rd, $Rn, $Rm"), +                                [], +                                NoItinerary>; +    def sss : NeonI_Scalar3Same<u, 0b10, opcode, +                                (outs FPR32:$Rd), (ins FPR32:$Rn, FPR32:$Rm), +                                !strconcat(asmop, " $Rd, $Rn, $Rm"), +                                [], +                                NoItinerary>; +    def ddd : NeonI_Scalar3Same<u, 0b11, opcode, +                               (outs FPR64:$Rd), (ins FPR64:$Rn, FPR64:$Rm), +                               !strconcat(asmop, " $Rd, $Rn, $Rm"), +                               [], +                               NoItinerary>; +  } +} + +class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> +  : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), +        (SUBREG_TO_REG (i64 0), +              (INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64), +             (EXTRACT_SUBREG VPR64:$Rm, sub_64)), +          sub_64)>; + + +// Scalar Integer Add +let isCommutable = 1 in { +def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; +} + +// Scalar Integer Sub +def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; + +// Pattern for Scalar Integer Add and Sub with D register +def : Neon_Scalar_D_size_patterns<add, ADDddd>; +def : Neon_Scalar_D_size_patterns<sub, SUBddd>; + +// Scalar Integer Saturating Add (Signed, Unsigned) +defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; +defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; + +// Scalar Integer Saturating Sub (Signed, Unsigned) +defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; +defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; + +// Patterns for Scalar Integer Saturating Add, Sub with D register only +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqadds, SQADDddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqaddu, UQADDddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubs, SQSUBddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqsubu, UQSUBddd>; + +// Scalar Integer Shift Left (Signed, Unsigned) +def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; +def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; + +// Scalar Integer Saturating Shift Left (Signed, Unsigned) +defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; +defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; + +// Scalar Integer Rouding Shift Left (Signed, Unsigned) +def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; +def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; + +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; +defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; + +// Patterns for Scalar Integer Shift Lef, Saturating Shift Left, +// Rounding Shift Left, Rounding Saturating Shift Left with D register only +def : Neon_Scalar_D_size_patterns<int_arm_neon_vshifts, SSHLddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vshiftu, USHLddd>; +def : Neon_Scalar_D_size_patterns<shl, SSHLddd>; +def : Neon_Scalar_D_size_patterns<shl, USHLddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>; +def : Neon_Scalar_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>; + + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// 64-bit vector bitcasts... + +def : Pat<(v1i64 (bitconvert (v8i8  VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v8i8  VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v8i8  VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v8i8  VPR64:$src))), (v4i16 VPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (v4i16  VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4i16  VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4i16  VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v8i8  (bitconvert (v4i16  VPR64:$src))), (v8i8 VPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (v2i32  VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v2i32  VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2i32  VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v8i8  (bitconvert (v2i32  VPR64:$src))), (v8i8 VPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (v2f32  VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v2f32  VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2f32  VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v8i8  (bitconvert (v2f32  VPR64:$src))), (v8i8 VPR64:$src)>; + +def : Pat<(v2f32 (bitconvert (v1i64  VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64  VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64  VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v8i8  (bitconvert (v1i64  VPR64:$src))), (v8i8 VPR64:$src)>; + +// ..and 128-bit vector bitcasts... + +def : Pat<(v2f64 (bitconvert (v16i8  VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v16i8  VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v16i8  VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v16i8  VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v16i8  VPR128:$src))), (v8i16 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v8i16  VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16  VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16  VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16  VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8i16  VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v4i32  VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32  VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32  VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32  VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32  VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v4f32  VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32  VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32  VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32  VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32  VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v2i64  VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64  VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64  VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64  VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64  VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2i64 (bitconvert (v2f64  VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64  VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64  VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64  VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64  VPR128:$src))), (v16i8 VPR128:$src)>; + + +// ...and scalar bitcasts... + +def : Pat<(f64   (bitconvert (v8i8  VPR64:$src))), +                 (f64 (EXTRACT_SUBREG (v8i8  VPR64:$src), sub_64))>; +def : Pat<(f64   (bitconvert (v4i16  VPR64:$src))), +                 (f64 (EXTRACT_SUBREG (v4i16  VPR64:$src), sub_64))>; +def : Pat<(f64   (bitconvert (v2i32  VPR64:$src))), +                 (f64 (EXTRACT_SUBREG (v2i32  VPR64:$src), sub_64))>; +def : Pat<(f64   (bitconvert (v2f32  VPR64:$src))), +                 (f64 (EXTRACT_SUBREG (v2f32  VPR64:$src), sub_64))>; +def : Pat<(f64   (bitconvert (v1i64  VPR64:$src))), +                 (f64 (EXTRACT_SUBREG (v1i64  VPR64:$src), sub_64))>; +def : Pat<(f128  (bitconvert (v16i8  VPR128:$src))), +                 (f128 (EXTRACT_SUBREG (v16i8  VPR128:$src), sub_alias))>; +def : Pat<(f128  (bitconvert (v8i16  VPR128:$src))), +                 (f128 (EXTRACT_SUBREG (v8i16  VPR128:$src), sub_alias))>; +def : Pat<(f128  (bitconvert (v4i32  VPR128:$src))), +                 (f128 (EXTRACT_SUBREG (v4i32  VPR128:$src), sub_alias))>; +def : Pat<(f128  (bitconvert (v2i64  VPR128:$src))), +                 (f128 (EXTRACT_SUBREG (v2i64  VPR128:$src), sub_alias))>; +def : Pat<(f128  (bitconvert (v4f32  VPR128:$src))), +                 (f128 (EXTRACT_SUBREG (v4f32  VPR128:$src), sub_alias))>; +def : Pat<(f128  (bitconvert (v2f64  VPR128:$src))), +                 (f128 (EXTRACT_SUBREG (v2f64  VPR128:$src), sub_alias))>; + +def : Pat<(v8i8   (bitconvert (f64   FPR64:$src))), +                  (v8i8 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>; +def : Pat<(v4i16  (bitconvert (f64   FPR64:$src))), +                  (v4i16 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>; +def : Pat<(v2i32  (bitconvert (f64   FPR64:$src))), +                  (v2i32 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>; +def : Pat<(v2f32  (bitconvert (f64   FPR64:$src))), +                  (v2f32 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>; +def : Pat<(v1i64  (bitconvert (f64   FPR64:$src))), +                  (v1i64 (SUBREG_TO_REG (i64 0), (f64  FPR64:$src), sub_64))>; +def : Pat<(v16i8  (bitconvert (f128   FPR128:$src))), +                  (v16i8 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src), +                  sub_alias))>; +def : Pat<(v8i16  (bitconvert (f128   FPR128:$src))), +                  (v8i16 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src), +                  sub_alias))>; +def : Pat<(v4i32  (bitconvert (f128   FPR128:$src))), +                  (v4i32 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src), +                  sub_alias))>; +def : Pat<(v2i64  (bitconvert (f128   FPR128:$src))), +                  (v2i64 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src), +                  sub_alias))>; +def : Pat<(v4f32  (bitconvert (f128   FPR128:$src))), +                  (v4f32 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src), +                  sub_alias))>; +def : Pat<(v2f64  (bitconvert (f128   FPR128:$src))), +                  (v2f64 (SUBREG_TO_REG (i128 0), (f128  FPR128:$src), +                  sub_alias))>; diff --git a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp index 3d22330afe7..7ce5ce3441e 100644 --- a/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -109,6 +109,11 @@ bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,    case MachineOperand::MO_Immediate:      MCOp = MCOperand::CreateImm(MO.getImm());      break; +  case MachineOperand::MO_FPImmediate: { +    assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported"); +    MCOp = MCOperand::CreateFPImm(0.0); +    break; +  }    case MachineOperand::MO_BlockAddress:      MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));      break; diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index cc2bb6135cc..b3a81b1dc0a 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -185,7 +185,7 @@ foreach Index = 0-31 in {  // These two classes contain the same registers, which should be reasonably  // sensible for MC and allocation purposes, but allows them to be treated  // separately for things like stack spilling. -def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64, +def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64,                            (sequence "V%u", 0, 31)>;  def VPR128 : RegisterClass<"AArch64", diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index d17b7382099..d71bb4e9734 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -26,10 +26,8 @@  using namespace llvm;  AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS) -  : AArch64GenSubtargetInfo(TT, CPU, FS) -  , HasNEON(true) -  , HasCrypto(true) -  , TargetTriple(TT) { +    : AArch64GenSubtargetInfo(TT, CPU, FS), HasNEON(false), HasCrypto(false), +      TargetTriple(TT) {    ParseSubtargetFeatures(CPU, FS);  } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 2e9205fc992..35a7c8d85db 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -48,6 +48,9 @@ public:    bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }    bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } +  bool hasNEON() const { return HasNEON; } + +  bool hasCrypto() const { return HasCrypto; }  };  } // End llvm namespace diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 10a9a6a4062..43e91ac4e01 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -664,8 +664,42 @@ public:      return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;    } -  template<int MemSize>  bool isSImm7Scaled() const { -    if (!isImm()) return false; +  bool isNeonMovImmShiftLSL() const { +    if (!isShiftOrExtend()) +      return false; + +    if (ShiftExtend.ShiftType != A64SE::LSL) +      return false; + +    // Valid shift amount is 0, 8, 16 and 24. +    return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24; +  } + +  bool isNeonMovImmShiftLSLH() const { +    if (!isShiftOrExtend()) +      return false; + +    if (ShiftExtend.ShiftType != A64SE::LSL) +      return false; + +    // Valid shift amount is 0 and 8. +    return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8; +  } + +  bool isNeonMovImmShiftMSL() const { +    if (!isShiftOrExtend()) +      return false; + +    if (ShiftExtend.ShiftType != A64SE::MSL) +      return false; + +    // Valid shift amount is 8 and 16. +    return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16; +  } + +  template <int MemSize> bool isSImm7Scaled() const { +    if (!isImm()) +      return false;      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());      if (!CE) return false; @@ -705,10 +739,27 @@ public:      return isa<MCConstantExpr>(getImm());    } +  bool isNeonUImm64Mask() const { +    if (!isImm()) +      return false; + +    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); +    if (!CE) +      return false; + +    uint64_t Value = CE->getValue(); + +    // i64 value with each byte being either 0x00 or 0xff. +    for (unsigned i = 0; i < 8; ++i, Value >>= 8) +      if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) +        return false; +    return true; +  } +    static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,                                            unsigned ShiftAmount,                                            bool ImplicitAmount, -                                          SMLoc S, SMLoc E) { +										  SMLoc S,SMLoc E) {      AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);      Op->ImmWithLSL.Val = Val;      Op->ImmWithLSL.ShiftAmount = ShiftAmount; @@ -1026,6 +1077,40 @@ public:      Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));    } +  // For Vector Immediates shifted imm operands. +  void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const { +    assert(N == 1 && "Invalid number of operands!"); + +    if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24) +      llvm_unreachable("Invalid shift amount for vector immediate inst."); + +    // Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3. +    int64_t Imm = ShiftExtend.Amount / 8; +    Inst.addOperand(MCOperand::CreateImm(Imm)); +  } + +  void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const { +    assert(N == 1 && "Invalid number of operands!"); + +    if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8) +      llvm_unreachable("Invalid shift amount for vector immediate inst."); + +    // Encode LSLH shift amount 0, 8  as 0, 1. +    int64_t Imm = ShiftExtend.Amount / 8; +    Inst.addOperand(MCOperand::CreateImm(Imm)); +  } + +  void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const { +    assert(N == 1 && "Invalid number of operands!"); + +    if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16) +      llvm_unreachable("Invalid shift amount for vector immediate inst."); + +    // Encode MSL shift amount 8, 16  as 0, 1. +    int64_t Imm = ShiftExtend.Amount / 8 - 1; +    Inst.addOperand(MCOperand::CreateImm(Imm)); +  } +    // For the extend in load-store (register offset) instructions.    template<unsigned MemSize>    void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { @@ -1065,6 +1150,20 @@ public:      Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));    } + +  void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const { +    assert(N == 1 && "Invalid number of operands!"); + +    // A bit from each byte in the constant forms the encoded immediate +    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); +    uint64_t Value = CE->getValue(); + +    unsigned Imm = 0; +    for (unsigned i = 0; i < 8; ++i, Value >>= 8) { +      Imm |= (Value & 1) << i; +    } +    Inst.addOperand(MCOperand::CreateImm(Imm)); +  }  };  } // end anonymous namespace. @@ -1660,20 +1759,21 @@ AArch64AsmParser::ParseShiftExtend(    std::string LowerID = IDVal.lower();    A64SE::ShiftExtSpecifiers Spec = -    StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID) -      .Case("lsl", A64SE::LSL) -      .Case("lsr", A64SE::LSR) -      .Case("asr", A64SE::ASR) -      .Case("ror", A64SE::ROR) -      .Case("uxtb", A64SE::UXTB) -      .Case("uxth", A64SE::UXTH) -      .Case("uxtw", A64SE::UXTW) -      .Case("uxtx", A64SE::UXTX) -      .Case("sxtb", A64SE::SXTB) -      .Case("sxth", A64SE::SXTH) -      .Case("sxtw", A64SE::SXTW) -      .Case("sxtx", A64SE::SXTX) -      .Default(A64SE::Invalid); +      StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID) +        .Case("lsl", A64SE::LSL) +	.Case("msl", A64SE::MSL) +	.Case("lsr", A64SE::LSR) +	.Case("asr", A64SE::ASR) +	.Case("ror", A64SE::ROR) +	.Case("uxtb", A64SE::UXTB) +	.Case("uxth", A64SE::UXTH) +	.Case("uxtw", A64SE::UXTW) +	.Case("uxtx", A64SE::UXTX) +	.Case("sxtb", A64SE::SXTB) +	.Case("sxth", A64SE::SXTH) +	.Case("sxtw", A64SE::SXTW) +	.Case("sxtx", A64SE::SXTX) +	.Default(A64SE::Invalid);    if (Spec == A64SE::Invalid)      return MatchOperand_NoMatch; @@ -1683,8 +1783,8 @@ AArch64AsmParser::ParseShiftExtend(    S = Parser.getTok().getLoc();    Parser.Lex(); -  if (Spec != A64SE::LSL && Spec != A64SE::LSR && -      Spec != A64SE::ASR && Spec != A64SE::ROR) { +  if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR && +      Spec != A64SE::ROR && Spec != A64SE::MSL) {      // The shift amount can be omitted for the extending versions, but not real      // shifts:      //     add x0, x0, x0, uxtb @@ -2019,7 +2119,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,                   "expected compatible register or floating-point constant");    case Match_FPZero:      return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), -                 "expected floating-point constant #0.0"); +                 "expected floating-point constant #0.0 or invalid register type");    case Match_Label:      return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),                   "expected label or encodable integer pc offset"); diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 36dd7041402..a88a8e8e9e6 100644 --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -85,6 +85,9 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,  static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,                                                unsigned RegNo, uint64_t Address,                                                const void *Decoder); +static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +                                             uint64_t Address, +                                             const void *Decoder);  static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,                                                unsigned RegNo, uint64_t Address,                                                const void *Decoder); @@ -126,6 +129,10 @@ static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,                                             unsigned ShiftAmount,                                             uint64_t Address,                                             const void *Decoder); +template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf> +static DecodeStatus +DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, +                             uint64_t Address, const void *Decoder);  static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,                                              unsigned ShiftAmount, @@ -336,9 +343,20 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,    return MCDisassembler::Success;  } +static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, +                                             uint64_t Address, +                                             const void *Decoder) { +  if (RegNo > 31) +    return MCDisassembler::Fail; + +  uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo); +  Inst.addOperand(MCOperand::CreateReg(Register)); +  return MCDisassembler::Success; +} +  static DecodeStatus  DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, -                         uint64_t Address, const void *Decoder) { +						  uint64_t Address, const void *Decoder) {    if (RegNo > 31)      return MCDisassembler::Fail; @@ -799,4 +817,24 @@ extern "C" void LLVMInitializeAArch64Disassembler() {                                           createAArch64Disassembler);  } +template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf> +static DecodeStatus +DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, +                             uint64_t Address, const void *Decoder) { +  bool IsLSL = false; +  if (Ext == A64SE::LSL) +    IsLSL = true; +  else if (Ext != A64SE::MSL) +    return MCDisassembler::Fail; + +  // MSL and LSLH accepts encoded shift amount 0 or 1. +  if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1) +    return MCDisassembler::Fail; + +  // LSL  accepts encoded shift amount 0, 1, 2 or 3. +  if (IsLSL && ShiftAmount > 3) +    return MCDisassembler::Fail; +  Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); +  return MCDisassembler::Success; +} diff --git a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 82ce80c8b1a..b6243310d58 100644 --- a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -406,3 +406,84 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,    printAnnotation(O, Annot);  } + +template <A64SE::ShiftExtSpecifiers Ext, bool isHalf> +void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI, +                                                     unsigned OpNum, +                                                     raw_ostream &O) { +  const MCOperand &MO = MI->getOperand(OpNum); + +  assert(MO.isImm() && +         "Immediate operand required for Neon vector immediate inst."); + +  bool IsLSL = false; +  if (Ext == A64SE::LSL) +    IsLSL = true; +  else if (Ext != A64SE::MSL) +    llvm_unreachable("Invalid shift specifier in movi instruction"); + +  int64_t Imm = MO.getImm(); + +  // MSL and LSLH accepts encoded shift amount 0 or 1. +  if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1) +    llvm_unreachable("Invalid shift amount in movi instruction"); + +  // LSH accepts encoded shift amount 0, 1, 2 or 3. +  if (IsLSL && (Imm < 0 || Imm > 3)) +    llvm_unreachable("Invalid shift amount in movi instruction"); + +  // Print shift amount as multiple of 8 with MSL encoded shift amount +  // 0 and 1 printed as 8 and 16. +  if (!IsLSL) +    Imm++; +  Imm *= 8; + +  // LSL #0 is not printed +  if (IsLSL) { +    if (Imm == 0) +      return; +    O << ", lsl"; +  } else +    O << ", msl"; + +  O << " #" << Imm; +} + +void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, +                                               raw_ostream &o) { +  o << "#0x0"; +} + +void AArch64InstPrinter::printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, +                                               raw_ostream &O) { +  const MCOperand &MOUImm = MI->getOperand(OpNum); + +  assert(MOUImm.isImm() && +         "Immediate operand required for Neon vector immediate inst."); + +  unsigned Imm = MOUImm.getImm(); + +  O << "#0x"; +  O.write_hex(Imm); +} + +void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI, +                                                    unsigned OpNum, +                                                    raw_ostream &O) { +  const MCOperand &MOUImm8 = MI->getOperand(OpNum); + +  assert(MOUImm8.isImm() && +         "Immediate operand required for Neon vector immediate bytemask inst."); + +  uint32_t UImm8 = MOUImm8.getImm(); +  uint64_t Mask = 0; + +  // Replicates 0x00 or 0xff byte in a 64-bit vector +  for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { +    if ((UImm8 >> ByteNum) & 1) +      Mask |= (uint64_t)0xff << (8 * ByteNum); +  } + +  O << "#0x"; +  O.write_hex(Mask); +} diff --git a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 639fa869c01..f7439bec668 100644 --- a/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -164,9 +164,14 @@ public:      return RegNo == AArch64::XSP || RegNo == AArch64::WSP;    } - +  template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf> +  void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum, +                                   raw_ostream &O); +  void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); +  void printNeonUImm8Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); +  void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, +                                  raw_ostream &O);  }; -  }  #endif diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 48d48190fde..58fc95c2eaf 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -40,7 +40,7 @@ MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,                                                            StringRef CPU,                                                            StringRef FS) {    MCSubtargetInfo *X = new MCSubtargetInfo(); -  InitAArch64MCSubtargetInfo(X, TT, CPU, ""); +  InitAArch64MCSubtargetInfo(X, TT, CPU, FS);    return X;  } diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index 79865f6aa59..2a97cd63256 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -1105,3 +1105,69 @@ bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,    return isMOVNImm(RegWidth, Value, UImm16, Shift);  } + +// decodeNeonModShiftImm - Decode a Neon OpCmode value into the +// the shift amount and the shift type (shift zeros or ones in) and +// returns whether the OpCmode value implies a shift operation. +bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, +                                    unsigned &ShiftOnesIn) { +  ShiftImm = 0; +  ShiftOnesIn = false; +  bool HasShift = true; + +  if (OpCmode == 0xe) { +    // movi byte +    HasShift = false; +  } else if (OpCmode == 0x1e) { +    // movi 64-bit bytemask +    HasShift = false; +  } else if ((OpCmode & 0xc) == 0x8) { +    // shift zeros, per halfword +    ShiftImm = ((OpCmode & 0x2) >> 1); +  } else if ((OpCmode & 0x8) == 0) { +    // shift zeros, per word +    ShiftImm = ((OpCmode & 0x6) >> 1); +  } else if ((OpCmode & 0xe) == 0xc) { +    // shift ones, per word +    ShiftOnesIn = true; +    ShiftImm = (OpCmode & 0x1); +  } else { +    // per byte, per bytemask +    llvm_unreachable("Unsupported Neon modified immediate"); +  } + +  return HasShift; +} + +// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values +// into the element value and the element size in bits. +uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode, +                                   unsigned &EltBits) { +  uint64_t DecodedVal = Val; +  EltBits = 0; + +  if (OpCmode == 0xe) { +    // movi byte +    EltBits = 8; +  } else if (OpCmode == 0x1e) { +    // movi 64-bit bytemask +    DecodedVal = 0; +    for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { +      if ((Val >> ByteNum) & 1) +        DecodedVal |= (uint64_t)0xff << (8 * ByteNum); +    } +    EltBits = 64; +  } else if ((OpCmode & 0xc) == 0x8) { +    // shift zeros, per halfword +    EltBits = 16; +  } else if ((OpCmode & 0x8) == 0) { +    // shift zeros, per word +    EltBits = 32; +  } else if ((OpCmode & 0xe) == 0xc) { +    // shift ones, per word +    EltBits = 32; +  } else { +    llvm_unreachable("Unsupported Neon modified immediate"); +  } +  return DecodedVal; +} diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 9a1ca6127ae..e675efc9d9a 100644 --- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -289,6 +289,7 @@ namespace A64SE {      enum ShiftExtSpecifiers {          Invalid = -1,          LSL, +        MSL,          LSR,          ASR,          ROR, @@ -1068,7 +1069,10 @@ namespace A64Imms {    // MOVN but *not* with a MOVZ (because that would take priority).    bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); -} +  uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits); +  bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, +                             unsigned &ShiftOnesIn); +  }  } // end namespace llvm;  | 

