From 2f12e45d5ae1adbc2b45f11191d0db6a0f8352b9 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 31 Jul 2018 13:00:42 +0000 Subject: [SystemZ] Improve decoding in case of instructions with four register operands. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since z13, the max group size will be 2 if any μop has more than 3 register sources. This has been ignored sofar in the SystemZHazardRecognizer, but is now handled by recognizing those instructions and adjusting the tracking of decoding and the cost heuristic for grouping. Review: Ulrich Weigand https://reviews.llvm.org/D49847 llvm-svn: 338368 --- .../lib/Target/SystemZ/SystemZHazardRecognizer.cpp | 39 ++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp') diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp index d01dd9eaaaf..c7dd3581eae 100644 --- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) { void SystemZHazardRecognizer::Reset() { CurrGroupSize = 0; + CurrGroupHas4RegOps = false; clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; @@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { if (SC->BeginGroup) return (CurrGroupSize == 0); + // An instruction with 4 register operands will not fit in last slot. + assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) || + "Current decoder group is already full!"); + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return false; + // Since a full group is handled immediately in EmitInstruction(), // SU should fit into current group. NumSlots should be 1 or 0, // since it is not a cracked or expanded instruction. @@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const { return true; } +bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + const MCInstrDesc &MID = MI->getDesc(); + unsigned Count = 0; + for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); + if (RC == nullptr) + continue; + if (OpIdx >= MID.getNumDefs() && + MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + continue; + Count++; + } + return Count >= 4; +} + void SystemZHazardRecognizer::nextGroup() { if (CurrGroupSize == 0) return; @@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup() { // Reset counter for next group. CurrGroupSize = 0; + CurrGroupHas4RegOps = false; // Decrease counters for execution units by one. for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) @@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const { OS << "/EndsGroup"; if (SU->isUnbuffered) OS << "/Unbuffered"; + if (has4RegOps(SU->getInstr())) + OS << "/4RegOps"; } void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { @@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { dbgs() << "{ " << CurGroupDbg << " }"; dbgs() << " (" << CurrGroupSize << " decoder slot" << (CurrGroupSize > 1 ? "s":"") + << (CurrGroupHas4RegOps ? ", 4RegOps" : "") << ")\n"; } } @@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) { // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); - assert (CurrGroupSize <= 3); + CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); + unsigned GroupLim = + ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3); + assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!"); // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == 3 || SC->EndGroup) + if (CurrGroupSize == GroupLim || SC->EndGroup) nextGroup(); } @@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const { return -1; } + // An instruction with 4 register operands will not fit in last slot. + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return 1; + // Most instructions can be placed in any decoder slot. return 0; } -- cgit v1.2.3