diff options
| author | Jessica Paquette <jpaquette@apple.com> | 2019-06-17 18:40:06 +0000 |
|---|---|---|
| committer | Jessica Paquette <jpaquette@apple.com> | 2019-06-17 18:40:06 +0000 |
| commit | 49537bbf74a68c44d32e51f15080500ce994f1fc (patch) | |
| tree | 68284a6e347e728650d12da2242b46ef94159634 /llvm/lib | |
| parent | f3f968adcd5988ddb818a37c80cc7d7a131ccc18 (diff) | |
| download | bcm5719-llvm-49537bbf74a68c44d32e51f15080500ce994f1fc.tar.gz bcm5719-llvm-49537bbf74a68c44d32e51f15080500ce994f1fc.zip | |
[GlobalISel][AArch64] Fold G_SUB into G_ICMP when it's safe to do so
Basically porting over the behaviour in AArch64ISelLowering to GISel. See
emitComparison for reference.
When we have something like this:
```
lhs = G_SUB 0, y
...
G_ICMP lhs, rhs
```
We can fold away the G_SUB and produce a cmn instead, given that we produce
the same value in NZCV.
Add a test showing that the transformation works, and also showing that we
don't perform the transformation when it's unsafe.
Also factor out the CSet emission into emitCSetForICMP.
Differential Revision: https://reviews.llvm.org/D63163
llvm-svn: 363596
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp | 160 |
1 files changed, 144 insertions, 16 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp index cd3ca7cf37d..d059017da2f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -124,6 +124,10 @@ private: MachineInstr *emitFMovForFConstant(MachineInstr &MI, MachineRegisterInfo &MRI) const; + /// Emit a CSet for a compare. + MachineInstr *emitCSetForICMP(unsigned DefReg, unsigned Pred, + MachineIRBuilder &MIRBuilder) const; + ComplexRendererFns selectArithImmed(MachineOperand &Root) const; ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, @@ -173,6 +177,7 @@ private: bool tryOptVectorShuffle(MachineInstr &I) const; bool tryOptVectorDup(MachineInstr &MI) const; bool tryOptSelect(MachineInstr &MI) const; + bool tryOptCMN(MachineInstr &MI) const; const AArch64TargetMachine &TM; const AArch64Subtarget &STI; @@ -1847,6 +1852,11 @@ bool AArch64InstructionSelector::select(MachineInstr &I, unsigned CmpOpc = 0; unsigned ZReg = 0; + // Check if this compare can be represented as a cmn, and perform any + // necessary transformations to do so. + if (tryOptCMN(I)) + return true; + LLT CmpTy = MRI.getType(I.getOperand(2).getReg()); if (CmpTy == LLT::scalar(32)) { CmpOpc = AArch64::SUBSWrr; @@ -1863,13 +1873,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I, if (ImmFns) CmpOpc = CmpOpc == AArch64::SUBSWrr ? AArch64::SUBSWri : AArch64::SUBSXri; - // CSINC increments the result by one when the condition code is false. - // Therefore, we have to invert the predicate to get an increment by 1 when - // the predicate is true. - const AArch64CC::CondCode invCC = - changeICMPPredToAArch64CC(CmpInst::getInversePredicate( - (CmpInst::Predicate)I.getOperand(1).getPredicate())); - auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) .addDef(ZReg) .addUse(I.getOperand(2).getReg()); @@ -1882,16 +1885,10 @@ bool AArch64InstructionSelector::select(MachineInstr &I, CmpMI.addUse(I.getOperand(3).getReg()); } - MachineInstr &CSetMI = - *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) - .addDef(I.getOperand(0).getReg()) - .addUse(AArch64::WZR) - .addUse(AArch64::WZR) - .addImm(invCC); - + MachineIRBuilder MIRBuilder(I); + emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(), + MIRBuilder); constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); - constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); - I.eraseFromParent(); return true; } @@ -2854,6 +2851,20 @@ MachineInstr *AArch64InstructionSelector::emitFMovForFConstant( return &I; } +MachineInstr * +AArch64InstructionSelector::emitCSetForICMP(unsigned DefReg, unsigned Pred, + MachineIRBuilder &MIRBuilder) const { + // CSINC increments the result when the predicate is false. Invert it. + const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( + CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); + auto I = + MIRBuilder + .buildInstr(AArch64::CSINCWr, {DefReg}, {AArch64::WZR, AArch64::WZR}) + .addImm(InvCC); + constrainSelectedInstRegOperands(*I, TII, TRI, RBI); + return &*I; +} + bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { MachineIRBuilder MIB(I); MachineRegisterInfo &MRI = *MIB.getMRI(); @@ -2938,6 +2949,123 @@ bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { return true; } +bool AArch64InstructionSelector::tryOptCMN(MachineInstr &I) const { + assert(I.getOpcode() == TargetOpcode::G_ICMP && "Expected G_ICMP"); + MachineIRBuilder MIRBuilder(I); + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + // We want to find this sort of thing: + // x = G_SUB 0, y + // G_ICMP z, x + // + // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. + // e.g: + // + // cmn z, y + + // Helper lambda to find the def. + auto FindDef = [&](unsigned VReg) { + MachineInstr *Def = MRI.getVRegDef(VReg); + while (Def) { + if (Def->getOpcode() != TargetOpcode::COPY) + break; + // Copies can be from physical registers. If we hit this, we're done. + if (TargetRegisterInfo::isPhysicalRegister(Def->getOperand(1).getReg())) + break; + Def = MRI.getVRegDef(Def->getOperand(1).getReg()); + } + return Def; + }; + + // Helper lambda to detect the subtract followed by the compare. + // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. + auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { + if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) + return false; + + // Need to make sure NZCV is the same at the end of the transformation. + if (CC != AArch64CC::EQ && CC != AArch64CC::NE) + return false; + + // We want to match against SUBs. + if (DefMI->getOpcode() != TargetOpcode::G_SUB) + return false; + + // Make sure that we're getting + // x = G_SUB 0, y + auto ValAndVReg = + getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); + if (!ValAndVReg || ValAndVReg->Value != 0) + return false; + + // This can safely be represented as a CMN. + return true; + }; + + // Check if the RHS or LHS of the G_ICMP is defined by a SUB + MachineInstr *LHSDef = FindDef(I.getOperand(2).getReg()); + MachineInstr *RHSDef = FindDef(I.getOperand(3).getReg()); + const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( + (CmpInst::Predicate)I.getOperand(1).getPredicate()); + bool DidFold = false; + if (IsCMN(LHSDef, CC)) { + // We're doing this: + // + // Given: + // + // x = G_SUB 0, y + // G_ICMP x, z + // + // Update the G_ICMP: + // + // G_ICMP y, z + I.getOperand(2).setReg(LHSDef->getOperand(2).getReg()); + DidFold = true; + } else if (IsCMN(RHSDef, CC)) { + // Same idea here, but with the RHS of the compare instead: + // + // Given: + // + // x = G_SUB 0, y + // G_ICMP z, x + // + // Update the G_ICMP: + // + // G_ICMP z, y + I.getOperand(3).setReg(RHSDef->getOperand(2).getReg()); + DidFold = true; + } + + if (DidFold) { + // We can fold. Emit a CMN. + static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri}, + {AArch64::ADDSWrr, AArch64::ADDSWri}}; + bool Is32Bit = + (MRI.getType(I.getOperand(2).getReg()).getSizeInBits() == 32); + auto ImmFns = selectArithImmed(I.getOperand(3)); + unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; + unsigned ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; + + auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {I.getOperand(2).getReg()}); + + // If we matched a valid constant immediate, add those operands. + if (ImmFns) { + for (auto &RenderFn : *ImmFns) + RenderFn(CmpMI); + } else { + CmpMI.addUse(I.getOperand(3).getReg()); + } + + constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); + + // Add a CSet after the CMN. + emitCSetForICMP(I.getOperand(0).getReg(), I.getOperand(1).getPredicate(), + MIRBuilder); + I.eraseFromParent(); + } + + return DidFold; +} + bool AArch64InstructionSelector::tryOptVectorDup(MachineInstr &I) const { // Try to match a vector splat operation into a dup instruction. // We're looking for this pattern: |

