summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-02-07 19:37:44 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-02-07 19:37:44 +0000
commitfbec8fe93bb036113a2d7224a501b8e903ce2896 (patch)
tree01bb2b0899fa8e1fd747fcb133a2d679e4e5b468 /llvm/lib/CodeGen
parentd914189a2ec513dd49551a20757ff623a6d50476 (diff)
downloadbcm5719-llvm-fbec8fe93bb036113a2d7224a501b8e903ce2896.tar.gz
bcm5719-llvm-fbec8fe93bb036113a2d7224a501b8e903ce2896.zip
GlobalISel: Implement narrowScalar for shift main type
This is pretty much directly ported from SelectionDAG. Doesn't include the shift by non-constant but known bits version, since there isn't a globalisel version of computeKnownBits yet. This shows a disadvantage of targets not specifically which type should be used for the shift amount. If type 0 is legalized before type 1, the operations on the shift amount type use the wider type (which are also less likely to legalize). This can be avoided by targets specifying legalization actions on type 1 earlier than for type 0. llvm-svn: 353455
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp225
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp22
2 files changed, 235 insertions, 12 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a6ca85566fa..3dd5c32f5d0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -811,14 +811,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
}
case TargetOpcode::G_SHL:
case TargetOpcode::G_LSHR:
- case TargetOpcode::G_ASHR: {
- if (TypeIdx != 1)
- return UnableToLegalize; // TODO
- Observer.changingInstr(MI);
- narrowScalarSrc(MI, NarrowTy, 2);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_ASHR:
+ return narrowScalarShift(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ:
@@ -2195,6 +2189,221 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
+ const LLT HalfTy, const LLT AmtTy) {
+
+ unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
+ unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+ if (Amt.isNullValue()) {
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ LLT NVT = HalfTy;
+ unsigned NVTBits = HalfTy.getSizeInBits();
+ unsigned VTBits = 2 * NVTBits;
+
+ SrcOp Lo(0), Hi(0);
+ if (MI.getOpcode() == TargetOpcode::G_SHL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = MIRBuilder.buildShl(NVT, InL,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ } else if (Amt == NVTBits) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = InL;
+ } else {
+ Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
+ Hi = MIRBuilder.buildOr(
+ NVT,
+ MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt)),
+ MIRBuilder.buildLShr(
+ NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits)));
+ }
+ } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildLShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
+ }
+ } else {
+ if (Amt.ugt(VTBits)) {
+ Hi = Lo = MIRBuilder.buildAShr(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
+ }
+ }
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+// TODO: Optimize if constant shift amount.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
+ LLT RequestedTy) {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, RequestedTy, 2);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ unsigned Amt = MI.getOperand(2).getReg();
+ LLT ShiftAmtTy = MRI.getType(Amt);
+ const unsigned DstEltSize = DstTy.getScalarSizeInBits();
+ if (DstEltSize % 2 != 0)
+ return UnableToLegalize;
+
+ // Ignore the input type. We can only go to exactly half the size of the
+ // input. If that isn't small enough, the resulting pieces will be further
+ // legalized.
+ const unsigned NewBitSize = DstEltSize / 2;
+ const LLT HalfTy = LLT::scalar(NewBitSize);
+ const LLT CondTy = LLT::scalar(1);
+
+ if (const MachineInstr *KShiftAmt =
+ getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
+ return narrowScalarShiftByConstant(
+ MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
+ }
+
+ // TODO: Expand with known bits.
+
+ // Handle the fully general expansion by an unknown amount.
+ auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
+
+ unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
+ unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+ auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
+ auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
+
+ auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
+ auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
+ auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
+
+ unsigned ResultRegs[2];
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SHL: {
+ // Short: ShAmt < NewBitSize
+ auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
+ auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+ auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
+ auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
+
+ auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
+ auto Hi = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ // Short: ShAmt < NewBitSize
+ auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+ auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
+ auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+ auto Lo = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+ auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ case TargetOpcode::G_ASHR: {
+ // Short: ShAmt < NewBitSize
+ auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+
+ // Sign of Hi part.
+ auto HiL = MIRBuilder.buildAShr(
+ HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
+
+ auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+ auto Lo = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+
+ auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ default:
+ llvm_unreachable("not a shift");
+ }
+
+ MIRBuilder.buildMerge(DstReg, ResultRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) {
unsigned DstReg = MI.getOperand(0).getReg();
unsigned Src0 = MI.getOperand(1).getReg();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 8fa818afd70..c01eee1fe88 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -185,6 +185,12 @@ void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
assert((Res == Op0 && Res == Op1) && "type mismatch");
}
+void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
+ const LLT &Op1) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
unsigned Op1) {
assert(getMRI()->getType(Res).isPointer() &&
@@ -852,11 +858,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
- case TargetOpcode::G_SHL:
case TargetOpcode::G_SUB:
case TargetOpcode::G_XOR:
case TargetOpcode::G_UDIV:
@@ -870,6 +873,17 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()),
SrcOps[1].getLLTTy(*getMRI()));
break;
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR: {
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 2 && "Invalid Srcs");
+ validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()));
+ break;
+ }
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
@@ -879,7 +893,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()), true);
break;
case TargetOpcode::G_TRUNC:
- case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPTRUNC: {
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 1 && "Invalid Srcs");
validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
OpenPOWER on IntegriCloud