summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
diff options
context:
space:
mode:
authorJonas Paulsson <paulsson@linux.vnet.ibm.com>2018-07-31 13:00:42 +0000
committerJonas Paulsson <paulsson@linux.vnet.ibm.com>2018-07-31 13:00:42 +0000
commit2f12e45d5ae1adbc2b45f11191d0db6a0f8352b9 (patch)
treecad54c3d7db1d0950a4e99626339eba55d4281fa /llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
parent9a801cb598cf42dd3dccc6bd370b78692e239606 (diff)
downloadbcm5719-llvm-2f12e45d5ae1adbc2b45f11191d0db6a0f8352b9.tar.gz
bcm5719-llvm-2f12e45d5ae1adbc2b45f11191d0db6a0f8352b9.zip
[SystemZ] Improve decoding in case of instructions with four register operands.
Since z13, the max group size will be 2 if any μop has more than 3 register sources. This has been ignored sofar in the SystemZHazardRecognizer, but is now handled by recognizing those instructions and adjusting the tracking of decoding and the cost heuristic for grouping. Review: Ulrich Weigand https://reviews.llvm.org/D49847 llvm-svn: 338368
Diffstat (limited to 'llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp')
-rw-r--r--llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp39
1 files changed, 37 insertions, 2 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
index d01dd9eaaaf..c7dd3581eae 100644
--- a/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -81,6 +81,7 @@ getHazardType(SUnit *m, int Stalls) {
void SystemZHazardRecognizer::Reset() {
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
clearProcResCounters();
GrpCount = 0;
LastFPdOpCycleIdx = UINT_MAX;
@@ -99,6 +100,12 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
if (SC->BeginGroup)
return (CurrGroupSize == 0);
+ // An instruction with 4 register operands will not fit in last slot.
+ assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) ||
+ "Current decoder group is already full!");
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return false;
+
// Since a full group is handled immediately in EmitInstruction(),
// SU should fit into current group. NumSlots should be 1 or 0,
// since it is not a cracked or expanded instruction.
@@ -108,6 +115,23 @@ SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
return true;
}
+bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
+ const MachineFunction &MF = *MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
+ const MCInstrDesc &MID = MI->getDesc();
+ unsigned Count = 0;
+ for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
+ const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
+ if (RC == nullptr)
+ continue;
+ if (OpIdx >= MID.getNumDefs() &&
+ MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
+ continue;
+ Count++;
+ }
+ return Count >= 4;
+}
+
void SystemZHazardRecognizer::nextGroup() {
if (CurrGroupSize == 0)
return;
@@ -119,6 +143,7 @@ void SystemZHazardRecognizer::nextGroup() {
// Reset counter for next group.
CurrGroupSize = 0;
+ CurrGroupHas4RegOps = false;
// Decrease counters for execution units by one.
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
@@ -172,6 +197,8 @@ void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
OS << "/EndsGroup";
if (SU->isUnbuffered)
OS << "/Unbuffered";
+ if (has4RegOps(SU->getInstr()))
+ OS << "/4RegOps";
}
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
@@ -184,6 +211,7 @@ void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
dbgs() << "{ " << CurGroupDbg << " }";
dbgs() << " (" << CurrGroupSize << " decoder slot"
<< (CurrGroupSize > 1 ? "s":"")
+ << (CurrGroupHas4RegOps ? ", 4RegOps" : "")
<< ")\n";
}
}
@@ -294,11 +322,14 @@ EmitInstruction(SUnit *SU) {
// Insert SU into current group by increasing number of slots used
// in current group.
CurrGroupSize += getNumDecoderSlots(SU);
- assert (CurrGroupSize <= 3);
+ CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
+ unsigned GroupLim =
+ ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3);
+ assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!");
// Check if current group is now full/ended. If so, move on to next
// group to be ready to evaluate more candidates.
- if (CurrGroupSize == 3 || SC->EndGroup)
+ if (CurrGroupSize == GroupLim || SC->EndGroup)
nextGroup();
}
@@ -325,6 +356,10 @@ int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
return -1;
}
+ // An instruction with 4 register operands will not fit in last slot.
+ if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
+ return 1;
+
// Most instructions can be placed in any decoder slot.
return 0;
}
OpenPOWER on IntegriCloud