diff options
author | Florian Hahn <florian.hahn@arm.com> | 2017-05-23 09:33:34 +0000 |
---|---|---|
committer | Florian Hahn <florian.hahn@arm.com> | 2017-05-23 09:33:34 +0000 |
commit | abb4218b988f6051cfc466a402b12dd91fb27c43 (patch) | |
tree | 6096567314b53fd92a9124d6a1e28154955a210a /llvm/lib | |
parent | 617be6e47596306a03b22f718dab00d986ac74a2 (diff) | |
download | bcm5719-llvm-abb4218b988f6051cfc466a402b12dd91fb27c43.tar.gz bcm5719-llvm-abb4218b988f6051cfc466a402b12dd91fb27c43.zip |
[AArch64] Make instruction fusion more aggressive.
Summary:
This patch makes instruction fusion more aggressive by
* adding artificial edges between the successors of FirstSU and
SecondSU, similar to BaseMemOpClusterMutation::clusterNeighboringMemOps.
* updating PostGenericScheduler::tryCandidate to keep clusters together,
similar to GenericScheduler::tryCandidate.
This change increases the number of AES instruction pairs generated on
Cortex-A57 and Cortex-A72. This doesn't change code at all in
most benchmarks or general code, but we've seen improvement on kernels
using AESE/AESMC and AESD/AESIMC.
Reviewers: evandro, kristof.beyls, t.p.northover, silviu.baranga, atrick, rengolin, MatzeB
Reviewed By: evandro
Subscribers: aemerson, rengolin, MatzeB, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D33230
llvm-svn: 303618
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/MachineScheduler.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64MacroFusion.cpp | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 2 |
3 files changed, 20 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 41e161f71e5..72b7ad47e09 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3233,6 +3233,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) return; + // Keep clustered nodes together. + if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), + Cand.SU == DAG->getNextClusterSucc(), + TryCand, Cand, Cluster)) + return; + // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index a6926a6700e..3b71d529db5 100644 --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -232,6 +232,19 @@ static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); + if (&SecondSU != &DAG->ExitSU) + // Make instructions dependent on FirstSU also dependent on SecondSU to + // prevent them from being scheduled between FirstSU and and SecondSU. + for (SUnit::const_succ_iterator + SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end(); + SI != SE; ++SI) { + if (!SI->getSUnit() || SI->getSUnit() == &SecondSU) + continue; + DEBUG(dbgs() << " Copy Succ "; + SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';); + DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial)); + } + ++NumFused; return true; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 132f192f2a9..82b1ff7b1fa 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -277,7 +277,7 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); - if (ST.hasFuseLiterals()) { + if (ST.hasFuseAES() || ST.hasFuseLiterals()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); |