summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAmara Emerson <aemerson@apple.com>2019-04-09 21:22:43 +0000
committerAmara Emerson <aemerson@apple.com>2019-04-09 21:22:43 +0000
commit9bf092d71989771fafc8c7c775dba8bb3779b5bd (patch)
tree7fc5a6c9902fef3fcf8bbd5e4329ae83078d2ed8 /llvm/lib
parent888dd5d198c9a4bda951dd613378a82b1757e2a3 (diff)
downloadbcm5719-llvm-9bf092d71989771fafc8c7c775dba8bb3779b5bd.tar.gz
bcm5719-llvm-9bf092d71989771fafc8c7c775dba8bb3779b5bd.zip
[AArch64][GlobalISel] Add isel support for vector G_ICMP and G_ASHR & G_SHL
The selection for G_ICMP is unfortunately not currently importable from SDAG due to the use of custom SDNodes. To support this, this selection method has an opcode table which has been generated by a script, indexed by various instruction properties. Ideally in future we will have a GISel native selection patterns that we can write in tablegen to improve on this. For selection of some types we also need support for G_ASHR and G_SHL which are generated as a result of legalization. This patch also adds support for them, generating the same code as SelectionDAG currently does. Differential Revision: https://reviews.llvm.org/D60436 llvm-svn: 358035
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp261
1 files changed, 259 insertions, 2 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index a4e97e10275..8318018db79 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -67,6 +67,9 @@ private:
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
+ bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+
// Helper to generate an equivalent of scalar_to_vector into a new register,
// returned via 'Dst'.
MachineInstr *emitScalarToVector(unsigned EltSize,
@@ -98,6 +101,7 @@ private:
MachineRegisterInfo &MRI) const;
bool selectIntrinsicWithSideEffects(MachineInstr &I,
MachineRegisterInfo &MRI) const;
+ bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
unsigned emitConstantPoolEntry(Constant *CPVal, MachineFunction &MF) const;
MachineInstr *emitLoadFromConstantPool(Constant *CPVal,
@@ -824,6 +828,77 @@ bool AArch64InstructionSelector::selectCompareBranch(
return true;
}
+bool AArch64InstructionSelector::selectVectorSHL(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_SHL);
+ unsigned DstReg = I.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(DstReg);
+ unsigned Src1Reg = I.getOperand(1).getReg();
+ unsigned Src2Reg = I.getOperand(2).getReg();
+
+ if (!Ty.isVector())
+ return false;
+
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ if (Ty == LLT::vector(4, 32)) {
+ Opc = AArch64::USHLv4i32;
+ RC = &AArch64::FPR128RegClass;
+ } else if (Ty == LLT::vector(2, 32)) {
+ Opc = AArch64::USHLv2i32;
+ RC = &AArch64::FPR64RegClass;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
+ return false;
+ }
+
+ MachineIRBuilder MIB(I);
+ auto UShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Src2Reg});
+ constrainSelectedInstRegOperands(*UShl, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
+bool AArch64InstructionSelector::selectVectorASHR(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ assert(I.getOpcode() == TargetOpcode::G_ASHR);
+ unsigned DstReg = I.getOperand(0).getReg();
+ const LLT Ty = MRI.getType(DstReg);
+ unsigned Src1Reg = I.getOperand(1).getReg();
+ unsigned Src2Reg = I.getOperand(2).getReg();
+
+ if (!Ty.isVector())
+ return false;
+
+ // There is not a shift right register instruction, but the shift left
+ // register instruction takes a signed value, where negative numbers specify a
+ // right shift.
+
+ unsigned Opc = 0;
+ unsigned NegOpc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ if (Ty == LLT::vector(4, 32)) {
+ Opc = AArch64::SSHLv4i32;
+ NegOpc = AArch64::NEGv4i32;
+ RC = &AArch64::FPR128RegClass;
+ } else if (Ty == LLT::vector(2, 32)) {
+ Opc = AArch64::SSHLv2i32;
+ NegOpc = AArch64::NEGv2i32;
+ RC = &AArch64::FPR64RegClass;
+ } else {
+ LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
+ return false;
+ }
+
+ MachineIRBuilder MIB(I);
+ auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
+ constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
+ auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
+ constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
+ I.eraseFromParent();
+ return true;
+}
+
bool AArch64InstructionSelector::selectVaStartAAPCS(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
return false;
@@ -1318,10 +1393,17 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FDIV:
- case TargetOpcode::G_OR:
+ case TargetOpcode::G_ASHR:
+ if (MRI.getType(I.getOperand(0).getReg()).isVector())
+ return selectVectorASHR(I, MRI);
+ LLVM_FALLTHROUGH;
case TargetOpcode::G_SHL:
+ if (Opcode == TargetOpcode::G_SHL &&
+ MRI.getType(I.getOperand(0).getReg()).isVector())
+ return selectVectorSHL(I, MRI);
+ LLVM_FALLTHROUGH;
+ case TargetOpcode::G_OR:
case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR:
case TargetOpcode::G_GEP: {
// Reject the various things we don't support yet.
if (unsupportedBinOp(I, RBI, MRI, TRI))
@@ -1625,6 +1707,9 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return true;
}
case TargetOpcode::G_ICMP: {
+ if (Ty.isVector())
+ return selectVectorICmp(I, MRI);
+
if (Ty != LLT::scalar(32)) {
LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
<< ", expected: " << LLT::scalar(32) << '\n');
@@ -1785,6 +1870,178 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
return false;
}
+bool AArch64InstructionSelector::selectVectorICmp(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ unsigned DstReg = I.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ unsigned SrcReg = I.getOperand(2).getReg();
+ unsigned Src2Reg = I.getOperand(3).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
+ unsigned NumElts = DstTy.getNumElements();
+
+ // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
+ // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
+ // Third index is cc opcode:
+ // 0 == eq
+ // 1 == ugt
+ // 2 == uge
+ // 3 == ult
+ // 4 == ule
+ // 5 == sgt
+ // 6 == sge
+ // 7 == slt
+ // 8 == sle
+ // ne is done by negating 'eq' result.
+
+ // This table below assumes that for some comparisons the operands will be
+ // commuted.
+ // ult op == commute + ugt op
+ // ule op == commute + uge op
+ // slt op == commute + sgt op
+ // sle op == commute + sge op
+ unsigned PredIdx = 0;
+ bool SwapOperands = false;
+ CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
+ switch (Pred) {
+ case CmpInst::ICMP_NE:
+ case CmpInst::ICMP_EQ:
+ PredIdx = 0;
+ break;
+ case CmpInst::ICMP_UGT:
+ PredIdx = 1;
+ break;
+ case CmpInst::ICMP_UGE:
+ PredIdx = 2;
+ break;
+ case CmpInst::ICMP_ULT:
+ PredIdx = 3;
+ SwapOperands = true;
+ break;
+ case CmpInst::ICMP_ULE:
+ PredIdx = 4;
+ SwapOperands = true;
+ break;
+ case CmpInst::ICMP_SGT:
+ PredIdx = 5;
+ break;
+ case CmpInst::ICMP_SGE:
+ PredIdx = 6;
+ break;
+ case CmpInst::ICMP_SLT:
+ PredIdx = 7;
+ SwapOperands = true;
+ break;
+ case CmpInst::ICMP_SLE:
+ PredIdx = 8;
+ SwapOperands = true;
+ break;
+ default:
+ llvm_unreachable("Unhandled icmp predicate");
+ return false;
+ }
+
+ // This table obviously should be tablegen'd when we have our GISel native
+ // tablegen selector.
+
+ static const unsigned OpcTable[4][4][9] = {
+ {
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
+ AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
+ AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
+ {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
+ AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
+ AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
+ },
+ {
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
+ AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
+ AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
+ {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
+ AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
+ AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */}
+ },
+ {
+ {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
+ AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
+ AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
+ {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
+ AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
+ AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */}
+ },
+ {
+ {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
+ AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
+ AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */},
+ {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
+ 0 /* invalid */}
+ },
+ };
+ unsigned EltIdx = Log2_32(SrcEltSize / 8);
+ unsigned NumEltsIdx = Log2_32(NumElts / 2);
+ unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
+ if (!Opc) {
+ LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
+ return false;
+ }
+
+ const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const TargetRegisterClass *SrcRC =
+ getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
+ if (!SrcRC) {
+ LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
+ return false;
+ }
+
+ unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
+ if (SrcTy.getSizeInBits() == 128)
+ NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
+
+ if (SwapOperands)
+ std::swap(SrcReg, Src2Reg);
+
+ MachineIRBuilder MIB(I);
+ auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
+ constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+
+ // Invert if we had a 'ne' cc.
+ if (NotOpc) {
+ Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
+ constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
+ } else {
+ MIB.buildCopy(DstReg, Cmp.getReg(0));
+ }
+ RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
+ I.eraseFromParent();
+ return true;
+}
+
MachineInstr *AArch64InstructionSelector::emitScalarToVector(
unsigned EltSize, const TargetRegisterClass *DstRC, unsigned Scalar,
MachineIRBuilder &MIRBuilder) const {
OpenPOWER on IntegriCloud