summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64MacroFusion.cpp168
1 files changed, 89 insertions, 79 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index 1cac19b8139..a6926a6700e 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -29,21 +29,25 @@ static cl::opt<bool> EnableMacroFusion("aarch64-misched-fusion", cl::Hidden,
namespace {
-/// \brief Verify that the instruction pair, First and Second,
-/// should be scheduled back to back. Given an anchor instruction, if the other
-/// instruction is unspecified, then verify that the anchor instruction may be
-/// part of a pair at all.
-static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
- const AArch64Subtarget &ST,
- const MachineInstr *First,
- const MachineInstr *Second) {
- assert((First || Second) && "At least one instr must be specified");
+/// \brief Verify that the instr pair, FirstMI and SecondMI, should be fused
+/// together. Given an anchor instr, when the other instr is unspecified, then
+/// check if the anchor instr may be part of a fused pair at all.
+static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
+ const TargetSubtargetInfo &TSI,
+ const MachineInstr *FirstMI,
+ const MachineInstr *SecondMI) {
+ assert((FirstMI || SecondMI) && "At least one instr must be specified");
+
+ const AArch64InstrInfo &II = static_cast<const AArch64InstrInfo&>(TII);
+ const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
+
+ // Assume wildcards for unspecified instrs.
unsigned FirstOpcode =
- First ? First->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
unsigned SecondOpcode =
- Second ? Second->getOpcode()
- : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
+ SecondMI ? SecondMI->getOpcode()
+ : static_cast<unsigned>(AArch64::INSTRUCTION_LIST_END);
if (ST.hasArithmeticBccFusion())
// Fuse CMN, CMP, TST followed by Bcc.
@@ -75,7 +79,7 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
case AArch64::BICSWrs:
case AArch64::BICSXrs:
// Shift value can be 0 making these behave like the "rr" variant...
- return !TII.hasShiftedReg(*First);
+ return !II.hasShiftedReg(*FirstMI);
case AArch64::INSTRUCTION_LIST_END:
return true;
}
@@ -117,7 +121,7 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
case AArch64::BICWrs:
case AArch64::BICXrs:
// Shift value can be 0 making these behave like the "rr" variant...
- return !TII.hasShiftedReg(*First);
+ return !II.hasShiftedReg(*FirstMI);
case AArch64::INSTRUCTION_LIST_END:
return true;
}
@@ -145,92 +149,98 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
// 32 bit immediate.
case AArch64::MOVZWi:
return (SecondOpcode == AArch64::MOVKWi &&
- Second->getOperand(3).getImm() == 16) ||
+ SecondMI->getOperand(3).getImm() == 16) ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
// Lower half of 64 bit immediate.
case AArch64::MOVZXi:
return (SecondOpcode == AArch64::MOVKXi &&
- Second->getOperand(3).getImm() == 16) ||
+ SecondMI->getOperand(3).getImm() == 16) ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
// Upper half of 64 bit immediate.
case AArch64::MOVKXi:
- return First->getOperand(3).getImm() == 32 &&
+ return FirstMI->getOperand(3).getImm() == 32 &&
((SecondOpcode == AArch64::MOVKXi &&
- Second->getOperand(3).getImm() == 48) ||
+ SecondMI->getOperand(3).getImm() == 48) ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END);
}
return false;
}
-/// \brief Implement the fusion of instruction pairs in the scheduling
-/// DAG, anchored at the instruction in ASU. Preds
-/// indicates if its dependencies in \param APreds are predecessors instead of
-/// successors.
-static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit *ASU,
- SmallVectorImpl<SDep> &APreds, bool Preds) {
- const AArch64InstrInfo *TII = static_cast<const AArch64InstrInfo *>(DAG->TII);
- const AArch64Subtarget &ST = DAG->MF.getSubtarget<AArch64Subtarget>();
-
- const MachineInstr *AMI = ASU->getInstr();
- if (!AMI || AMI->isPseudo() || AMI->isTransient() ||
- (Preds && !shouldScheduleAdjacent(*TII, ST, nullptr, AMI)) ||
- (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, nullptr)))
+/// \brief Implement the fusion of instr pairs in the scheduling DAG,
+/// anchored at the instr in AnchorSU..
+static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
+ const MachineInstr *AnchorMI = AnchorSU.getInstr();
+ if (!AnchorMI || AnchorMI->isPseudo() || AnchorMI->isTransient())
return false;
- for (SDep &BDep : APreds) {
- if (BDep.isWeak())
+ // If the anchor instr is the ExitSU, then consider its predecessors;
+ // otherwise, its successors.
+ bool Preds = (&AnchorSU == &DAG->ExitSU);
+ SmallVectorImpl<SDep> &AnchorDeps = Preds ? AnchorSU.Preds : AnchorSU.Succs;
+
+ const MachineInstr *FirstMI = Preds ? nullptr : AnchorMI;
+ const MachineInstr *SecondMI = Preds ? AnchorMI : nullptr;
+
+ // Check if the anchor instr may be fused.
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ return false;
+
+ // Explorer for fusion candidates among the dependencies of the anchor instr.
+ for (SDep &Dep : AnchorDeps) {
+ // Ignore dependencies that don't enforce ordering.
+ if (Dep.isWeak())
continue;
- SUnit *BSU = BDep.getSUnit();
- const MachineInstr *BMI = BSU->getInstr();
- if (!BMI || BMI->isPseudo() || BMI->isTransient() ||
- (Preds && !shouldScheduleAdjacent(*TII, ST, BMI, AMI)) ||
- (!Preds && !shouldScheduleAdjacent(*TII, ST, AMI, BMI)))
+ SUnit &DepSU = *Dep.getSUnit();
+ // Ignore the ExitSU if the dependents are successors.
+ if (!Preds && &DepSU == &DAG->ExitSU)
continue;
- // Create a single weak edge between the adjacent instrs. The only
- // effect is to cause bottom-up scheduling to heavily prioritize the
- // clustered instrs.
- if (Preds)
- DAG->addEdge(ASU, SDep(BSU, SDep::Cluster));
- else
- DAG->addEdge(BSU, SDep(ASU, SDep::Cluster));
-
- // Adjust the latency between the 1st instr and its predecessors/successors.
- for (SDep &Dep : APreds)
- if (Dep.getSUnit() == BSU)
- Dep.setLatency(0);
-
- // Adjust the latency between the 2nd instr and its successors/predecessors.
- auto &BSuccs = Preds ? BSU->Succs : BSU->Preds;
- for (SDep &Dep : BSuccs)
- if (Dep.getSUnit() == ASU)
- Dep.setLatency(0);
+ const MachineInstr *DepMI = DepSU.getInstr();
+ if (!DepMI || DepMI->isPseudo() || DepMI->isTransient())
+ continue;
- ++NumFused;
- DEBUG({ SUnit *LSU = Preds ? BSU : ASU;
- SUnit *RSU = Preds ? ASU : BSU;
- const MachineInstr *LMI = Preds ? BMI : AMI;
- const MachineInstr *RMI = Preds ? AMI : BMI;
-
- dbgs() << DAG->MF.getName() << "(): Macro fuse ";
- LSU->print(dbgs(), DAG);
- dbgs() << " - ";
- RSU->print(dbgs(), DAG);
- dbgs() << " / " <<
- TII->getName(LMI->getOpcode()) << " - " <<
- TII->getName(RMI->getOpcode()) << '\n';
- });
+ FirstMI = Preds ? DepMI : AnchorMI;
+ SecondMI = Preds ? AnchorMI : DepMI;
+ if (!shouldScheduleAdjacent(*DAG->TII, DAG->MF.getSubtarget(),
+ FirstMI, SecondMI))
+ continue;
+ // Create a single weak edge between the adjacent instrs. The only effect is
+ // to cause bottom-up scheduling to heavily prioritize the clustered instrs.
+ SUnit &FirstSU = Preds ? DepSU : AnchorSU;
+ SUnit &SecondSU = Preds ? AnchorSU : DepSU;
+ DAG->addEdge(&SecondSU, SDep(&FirstSU, SDep::Cluster));
+
+ // Adjust the latency between the anchor instr and its
+ // predecessors/successors.
+ for (SDep &IDep : AnchorDeps)
+ if (IDep.getSUnit() == &DepSU)
+ IDep.setLatency(0);
+
+ // Adjust the latency between the dependent instr and its
+ // successors/predecessors.
+ for (SDep &IDep : Preds ? DepSU.Succs : DepSU.Preds)
+ if (IDep.getSUnit() == &AnchorSU)
+ IDep.setLatency(0);
+
+ DEBUG(dbgs() << DAG->MF.getName() << "(): Macro fuse ";
+ FirstSU.print(dbgs(), DAG); dbgs() << " - ";
+ SecondSU.print(dbgs(), DAG); dbgs() << " / ";
+ dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
+ DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
+
+ ++NumFused;
return true;
}
return false;
}
-/// \brief Post-process the DAG to create cluster edges between instructions
-/// that may be fused by the processor into a single operation.
+/// \brief Post-process the DAG to create cluster edges between instrs that may
+/// be fused by the processor into a single operation.
class AArch64MacroFusion : public ScheduleDAGMutation {
public:
AArch64MacroFusion() {}
@@ -241,13 +251,13 @@ public:
void AArch64MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
- // For each of the SUnits in the scheduling block, try to fuse the instruction
- // in it with one in its successors.
- for (SUnit &ASU : DAG->SUnits)
- scheduleAdjacentImpl(DAG, &ASU, ASU.Succs, false);
+ // For each of the SUnits in the scheduling block, try to fuse the instr in it
+ // with one in its successors.
+ for (SUnit &ISU : DAG->SUnits)
+ scheduleAdjacentImpl(DAG, ISU);
- // Try to fuse the instruction in the ExitSU with one in its predecessors.
- scheduleAdjacentImpl(DAG, &DAG->ExitSU, DAG->ExitSU.Preds, true);
+ // Try to fuse the instr in the ExitSU with one in its predecessors.
+ scheduleAdjacentImpl(DAG, DAG->ExitSU);
}
} // end namespace
OpenPOWER on IntegriCloud