summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorFlorian Hahn <florian.hahn@arm.com>2017-05-23 09:33:34 +0000
committerFlorian Hahn <florian.hahn@arm.com>2017-05-23 09:33:34 +0000
commitabb4218b988f6051cfc466a402b12dd91fb27c43 (patch)
tree6096567314b53fd92a9124d6a1e28154955a210a /llvm/lib
parent617be6e47596306a03b22f718dab00d986ac74a2 (diff)
downloadbcm5719-llvm-abb4218b988f6051cfc466a402b12dd91fb27c43.tar.gz
bcm5719-llvm-abb4218b988f6051cfc466a402b12dd91fb27c43.zip
[AArch64] Make instruction fusion more aggressive.
Summary: This patch makes instruction fusion more aggressive by * adding artificial edges between the successors of FirstSU and SecondSU, similar to BaseMemOpClusterMutation::clusterNeighboringMemOps. * updating PostGenericScheduler::tryCandidate to keep clusters together, similar to GenericScheduler::tryCandidate. This change increases the number of AES instruction pairs generated on Cortex-A57 and Cortex-A72. This doesn't change code at all in most benchmarks or general code, but we've seen improvement on kernels using AESE/AESMC and AESD/AESIMC. Reviewers: evandro, kristof.beyls, t.p.northover, silviu.baranga, atrick, rengolin, MatzeB Reviewed By: evandro Subscribers: aemerson, rengolin, MatzeB, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D33230 llvm-svn: 303618
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp6
-rw-r--r--llvm/lib/Target/AArch64/AArch64MacroFusion.cpp13
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp2
3 files changed, 20 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 41e161f71e5..72b7ad47e09 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -3233,6 +3233,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
return;
+ // Keep clustered nodes together.
+ if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
+ Cand.SU == DAG->getNextClusterSucc(),
+ TryCand, Cand, Cluster))
+ return;
+
// Avoid critical resource consumption and balance the schedule.
if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
TryCand, Cand, ResourceReduce))
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index a6926a6700e..3b71d529db5 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -232,6 +232,19 @@ static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) {
dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " <<
DAG->TII->getName(SecondMI->getOpcode()) << '\n'; );
+ if (&SecondSU != &DAG->ExitSU)
+ // Make instructions dependent on FirstSU also dependent on SecondSU to
+ // prevent them from being scheduled between FirstSU and and SecondSU.
+ for (SUnit::const_succ_iterator
+ SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end();
+ SI != SE; ++SI) {
+ if (!SI->getSUnit() || SI->getSUnit() == &SecondSU)
+ continue;
+ DEBUG(dbgs() << " Copy Succ ";
+ SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';);
+ DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial));
+ }
+
++NumFused;
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 132f192f2a9..82b1ff7b1fa 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -277,7 +277,7 @@ public:
ScheduleDAGInstrs *
createPostMachineScheduler(MachineSchedContext *C) const override {
const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
- if (ST.hasFuseLiterals()) {
+ if (ST.hasFuseAES() || ST.hasFuseLiterals()) {
// Run the Macro Fusion after RA again since literals are expanded from
// pseudos then (v. addPreSched2()).
ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
OpenPOWER on IntegriCloud