diff options
| author | Amara Emerson <aemerson@apple.com> | 2019-04-09 21:22:43 +0000 | 
|---|---|---|
| committer | Amara Emerson <aemerson@apple.com> | 2019-04-09 21:22:43 +0000 | 
| commit | 9bf092d71989771fafc8c7c775dba8bb3779b5bd (patch) | |
| tree | 7fc5a6c9902fef3fcf8bbd5e4329ae83078d2ed8 /llvm/lib/Target | |
| parent | 888dd5d198c9a4bda951dd613378a82b1757e2a3 (diff) | |
| download | bcm5719-llvm-9bf092d71989771fafc8c7c775dba8bb3779b5bd.tar.gz bcm5719-llvm-9bf092d71989771fafc8c7c775dba8bb3779b5bd.zip | |
[AArch64][GlobalISel] Add isel support for vector G_ICMP and G_ASHR & G_SHL
The selection for G_ICMP is unfortunately not currently importable from SDAG
due to the use of custom SDNodes. To support this, this selection method has an
opcode table which has been generated by a script, indexed by various
instruction properties. Ideally in future we will have a GISel native selection
patterns that we can write in tablegen to improve on this.
For selection of some types we also need support for G_ASHR and G_SHL which are
generated as a result of legalization. This patch also adds support for them,
generating the same code as SelectionDAG currently does.
Differential Revision: https://reviews.llvm.org/D60436
llvm-svn: 358035
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 261 | 
1 files changed, 259 insertions, 2 deletions
| diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index a4e97e10275..8318018db79 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -67,6 +67,9 @@ private:    bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,                             MachineRegisterInfo &MRI) const; +  bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const; +  bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; +    // Helper to generate an equivalent of scalar_to_vector into a new register,    // returned via 'Dst'.    MachineInstr *emitScalarToVector(unsigned EltSize, @@ -98,6 +101,7 @@ private:                                  MachineRegisterInfo &MRI) const;    bool selectIntrinsicWithSideEffects(MachineInstr &I,                                        MachineRegisterInfo &MRI) const; +  bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;    unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;    MachineInstr *emitLoadFromConstantPool(Constant *CPVal, @@ -824,6 +828,77 @@ bool AArch64InstructionSelector::selectCompareBranch(    return true;  } +bool AArch64InstructionSelector::selectVectorSHL( +    MachineInstr &I, MachineRegisterInfo &MRI) const { +  assert(I.getOpcode() == TargetOpcode::G_SHL); +  unsigned DstReg = I.getOperand(0).getReg(); +  const LLT Ty = MRI.getType(DstReg); +  unsigned Src1Reg = I.getOperand(1).getReg(); +  unsigned Src2Reg = I.getOperand(2).getReg(); + +  if (!Ty.isVector()) +    return false; + +  unsigned Opc = 0; +  const TargetRegisterClass *RC = nullptr; +  if (Ty == LLT::vector(4, 32)) { +    Opc = AArch64::USHLv4i32; +    RC = &AArch64::FPR128RegClass; +  } else if (Ty == LLT::vector(2, 32)) { +    Opc = AArch64::USHLv2i32; +    RC = &AArch64::FPR64RegClass; +  } else { +    LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); +    return false; +  } + +  MachineIRBuilder MIB(I); +  auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg}); +  constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI); +  I.eraseFromParent(); +  return true; +} + +bool AArch64InstructionSelector::selectVectorASHR( +    MachineInstr &I, MachineRegisterInfo &MRI) const { +  assert(I.getOpcode() == TargetOpcode::G_ASHR); +  unsigned DstReg = I.getOperand(0).getReg(); +  const LLT Ty = MRI.getType(DstReg); +  unsigned Src1Reg = I.getOperand(1).getReg(); +  unsigned Src2Reg = I.getOperand(2).getReg(); + +  if (!Ty.isVector()) +    return false; + +  // There is not a shift right register instruction, but the shift left +  // register instruction takes a signed value, where negative numbers specify a +  // right shift. + +  unsigned Opc = 0; +  unsigned NegOpc = 0; +  const TargetRegisterClass *RC = nullptr; +  if (Ty == LLT::vector(4, 32)) { +    Opc = AArch64::SSHLv4i32; +    NegOpc = AArch64::NEGv4i32; +    RC = &AArch64::FPR128RegClass; +  } else if (Ty == LLT::vector(2, 32)) { +    Opc = AArch64::SSHLv2i32; +    NegOpc = AArch64::NEGv2i32; +    RC = &AArch64::FPR64RegClass; +  } else { +    LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); +    return false; +  } + +  MachineIRBuilder MIB(I); +  auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); +  constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); +  auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); +  constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); +  I.eraseFromParent(); +  return true; +} +  bool AArch64InstructionSelector::selectVaStartAAPCS(      MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {    return false; @@ -1318,10 +1393,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I,    case TargetOpcode::G_FMUL:    case TargetOpcode::G_FDIV: -  case TargetOpcode::G_OR: +  case TargetOpcode::G_ASHR: +    if (MRI.getType(I.getOperand(0).getReg()).isVector()) +      return selectVectorASHR(I, MRI); +    LLVM_FALLTHROUGH;    case TargetOpcode::G_SHL: +    if (Opcode == TargetOpcode::G_SHL && +        MRI.getType(I.getOperand(0).getReg()).isVector()) +      return selectVectorSHL(I, MRI); +    LLVM_FALLTHROUGH; +  case TargetOpcode::G_OR:    case TargetOpcode::G_LSHR: -  case TargetOpcode::G_ASHR:    case TargetOpcode::G_GEP: {      // Reject the various things we don't support yet.      if (unsupportedBinOp(I, RBI, MRI, TRI)) @@ -1625,6 +1707,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,      return true;    }    case TargetOpcode::G_ICMP: { +    if (Ty.isVector()) +      return selectVectorICmp(I, MRI); +      if (Ty != LLT::scalar(32)) {        LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty                          << ", expected: " << LLT::scalar(32) << '\n'); @@ -1785,6 +1870,178 @@ bool AArch64InstructionSelector::select(MachineInstr &I,    return false;  } +bool AArch64InstructionSelector::selectVectorICmp( +    MachineInstr &I, MachineRegisterInfo &MRI) const { +  unsigned DstReg = I.getOperand(0).getReg(); +  LLT DstTy = MRI.getType(DstReg); +  unsigned SrcReg = I.getOperand(2).getReg(); +  unsigned Src2Reg = I.getOperand(3).getReg(); +  LLT SrcTy = MRI.getType(SrcReg); + +  unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); +  unsigned NumElts = DstTy.getNumElements(); + +  // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b +  // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 +  // Third index is cc opcode: +  // 0 == eq +  // 1 == ugt +  // 2 == uge +  // 3 == ult +  // 4 == ule +  // 5 == sgt +  // 6 == sge +  // 7 == slt +  // 8 == sle +  // ne is done by negating 'eq' result. + +  // This table below assumes that for some comparisons the operands will be +  // commuted. +  // ult op == commute + ugt op +  // ule op == commute + uge op +  // slt op == commute + sgt op +  // sle op == commute + sge op +  unsigned PredIdx = 0; +  bool SwapOperands = false; +  CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); +  switch (Pred) { +  case CmpInst::ICMP_NE: +  case CmpInst::ICMP_EQ: +    PredIdx = 0; +    break; +  case CmpInst::ICMP_UGT: +    PredIdx = 1; +    break; +  case CmpInst::ICMP_UGE: +    PredIdx = 2; +    break; +  case CmpInst::ICMP_ULT: +    PredIdx = 3; +    SwapOperands = true; +    break; +  case CmpInst::ICMP_ULE: +    PredIdx = 4; +    SwapOperands = true; +    break; +  case CmpInst::ICMP_SGT: +    PredIdx = 5; +    break; +  case CmpInst::ICMP_SGE: +    PredIdx = 6; +    break; +  case CmpInst::ICMP_SLT: +    PredIdx = 7; +    SwapOperands = true; +    break; +  case CmpInst::ICMP_SLE: +    PredIdx = 8; +    SwapOperands = true; +    break; +  default: +    llvm_unreachable("Unhandled icmp predicate"); +    return false; +  } + +  // This table obviously should be tablegen'd when we have our GISel native +  // tablegen selector. + +  static const unsigned OpcTable[4][4][9] = { +      { +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */}, +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */}, +          {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, +           AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, +           AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, +          {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, +           AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, +           AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} +      }, +      { +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */}, +          {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, +           AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, +           AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, +          {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, +           AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, +           AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */} +      }, +      { +          {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, +           AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, +           AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, +          {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, +           AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, +           AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */}, +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */} +      }, +      { +          {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, +           AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, +           AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */}, +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */}, +          {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, +           0 /* invalid */} +      }, +  }; +  unsigned EltIdx = Log2_32(SrcEltSize / 8); +  unsigned NumEltsIdx = Log2_32(NumElts / 2); +  unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; +  if (!Opc) { +    LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); +    return false; +  } + +  const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); +  const TargetRegisterClass *SrcRC = +      getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); +  if (!SrcRC) { +    LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); +    return false; +  } + +  unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; +  if (SrcTy.getSizeInBits() == 128) +    NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; + +  if (SwapOperands) +    std::swap(SrcReg, Src2Reg); + +  MachineIRBuilder MIB(I); +  auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); +  constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); + +  // Invert if we had a 'ne' cc. +  if (NotOpc) { +    Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); +    constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); +  } else { +    MIB.buildCopy(DstReg, Cmp.getReg(0)); +  } +  RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); +  I.eraseFromParent(); +  return true; +} +  MachineInstr *AArch64InstructionSelector::emitScalarToVector(      unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,      MachineIRBuilder &MIRBuilder) const { | 

