summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/MacroFusion.cpp
diff options
context:
space:
mode:
authorQingShan Zhang <qshanz@cn.ibm.com>2019-12-04 04:58:34 +0000
committerQingShan Zhang <qshanz@cn.ibm.com>2019-12-04 05:05:35 +0000
commitd84b320dfd0a7dbedacc287ede5e5bc4c0f113ba (patch)
tree31802dcffa8b485b8e5c363049fd0b4277c0ae2d /llvm/lib/CodeGen/MacroFusion.cpp
parentf0ba1aec35d599353b6c5eca8286791b1c410b7c (diff)
downloadbcm5719-llvm-d84b320dfd0a7dbedacc287ede5e5bc4c0f113ba.tar.gz
bcm5719-llvm-d84b320dfd0a7dbedacc287ede5e5bc4c0f113ba.zip
[MacroFusion] Limit the max fused number as 2 to reduce the dependency
This is the example: int foo(int a, int b, int c, int d) { return a + b + c + d; } And this is the Dependency Graph: +------+ +------+ +------+ +------+ | A | | B | | C | | D | +--+--++ +---+--+ +--+---+ +--+---+ ^ ^ ^ ^ ^ ^ | | | | | | | | | |New1 +--------------+ | | | | | | | | | +--+---+ | |New2 | +-------+ ADD1 | | | | +--+---+ | | | Fuse ^ | | +-------------+ | +------------+ | | | Fuse +--+---+ +----------->+ ADD2 | | +------+ +--+---+ | ADD3 | +------+ We need also create an artificial edge from ADD1 to A if https://reviews.llvm.org/D69998 is landed. That will force the Node A scheduled before the ADD1 and ADD2. But in fact, it is ok to schedule the Node A in-between ADD3 and ADD2, as ADD3 and ADD2 are NOT a fusion pair because ADD2 has been matched to ADD1. We are creating these unnecessary dependency edges that override the heuristics. Differential Revision: https://reviews.llvm.org/D70066
Diffstat (limited to 'llvm/lib/CodeGen/MacroFusion.cpp')
-rw-r--r--llvm/lib/CodeGen/MacroFusion.cpp19
1 files changed, 18 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MacroFusion.cpp b/llvm/lib/CodeGen/MacroFusion.cpp
index d21eae222af..26cbc14166b 100644
--- a/llvm/lib/CodeGen/MacroFusion.cpp
+++ b/llvm/lib/CodeGen/MacroFusion.cpp
@@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) {
return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
}
+static SUnit *getPredClusterSU(const SUnit &SU) {
+ for (const SDep &SI : SU.Preds)
+ if (SI.isCluster())
+ return SI.getSUnit();
+
+ return nullptr;
+}
+
+static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
+ unsigned Num = 1;
+ const SUnit *CurrentSU = &SU;
+ while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
+ return Num < FuseLimit;
+}
+
static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
@@ -161,8 +176,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
if (DepSU.isBoundaryNode())
continue;
+ // Only chain two instructions together at most.
const MachineInstr *DepMI = DepSU.getInstr();
- if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
+ if (!hasLessThanNumFused(DepSU, 2) ||
+ !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
continue;
if (fuseInstructionPair(DAG, DepSU, AnchorSU))
OpenPOWER on IntegriCloud