diff options
author | Sanjay Patel <spatel@rotateright.com> | 2015-06-23 00:39:40 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2015-06-23 00:39:40 +0000 |
commit | e79b43a01f9022877b0f7d616cb2145d647b34b7 (patch) | |
tree | 459eac0b0f45672636b6b85d3b6e71ff706508f4 /llvm/lib/CodeGen/MachineCombiner.cpp | |
parent | 3a7d44cb81245783d20076801900373cffefdb0d (diff) | |
download | bcm5719-llvm-e79b43a01f9022877b0f7d616cb2145d647b34b7.tar.gz bcm5719-llvm-e79b43a01f9022877b0f7d616cb2145d647b34b7.zip |
[x86] generalize reassociation optimization in machine combiner to 2 instructions
Currently ( D10321, http://reviews.llvm.org/rL239486 ), we can use the machine combiner pass
to reassociate the following sequence to reduce the critical path:
A = ? op ?
B = A op X
C = B op Y
-->
A = ? op ?
B = X op Y
C = A op B
'op' is currently limited to x86 AVX scalar FP adds (with fast-math on), but in theory, it could
be any associative math/logic op (see TODO in code comment).
This patch generalizes the pattern match to ignore the instruction that defines 'A'. So instead of
a sequence of 3 adds, we now only need to find 2 dependent adds and decide if it's worth
reassociating them.
This generalization has a compile-time cost because we can now match more instruction sequences
and we rely more heavily on the machine combiner to discard sequences where reassociation doesn't
improve the critical path.
For example, in the new test case:
A = M div N
B = A add X
C = B add Y
We'll match 2 reassociation patterns, but this transform doesn't reduce the critical path:
A = M div N
B = A add Y
C = B add X
We need the combiner to reject that pattern but select this:
A = M div N
B = X add Y
C = B add A
Differential Revision: http://reviews.llvm.org/D10460
llvm-svn: 240361
Diffstat (limited to 'llvm/lib/CodeGen/MachineCombiner.cpp')
-rw-r--r-- | llvm/lib/CodeGen/MachineCombiner.cpp | 49 |
1 files changed, 31 insertions, 18 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp index 5019e8eef19..66b9fce4e46 100644 --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -67,10 +67,11 @@ private: unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace); bool - preservesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, - DenseMap<unsigned, unsigned> &InstrIdxForVirtReg); + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + bool NewCodeHasLessInsts); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -208,19 +209,24 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, return NewRootLatency; } -/// True when the new instruction sequence does not -/// lengthen the critical path. The DAGCombine code sequence ends in MI -/// (Machine Instruction) Root. The new code sequence ends in MI NewRoot. A -/// necessary condition for the new sequence to replace the old sequence is that -/// it cannot lengthen the critical path. This is decided by the formula +/// True when the new instruction sequence does not lengthen the critical path +/// and the new sequence has less instructions or the new sequence improves the +/// critical path. +/// The DAGCombine code sequence ends in MI (Machine Instruction) Root. +/// The new code sequence ends in MI NewRoot. A necessary condition for the new +/// sequence to replace the old sequence is that it cannot lengthen the critical +/// path. This is decided by the formula: /// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)). -/// The slack is the number of cycles Root can be delayed before the critical -/// patch becomes longer. -bool MachineCombiner::preservesCriticalPathLen( +/// If the new sequence has an equal length critical path but does not reduce +/// the number of instructions (NewCodeHasLessInsts is false), then it is not +/// considered an improvement. The slack is the number of cycles Root can be +/// delayed before the critical patch becomes longer. +bool MachineCombiner::improvesCriticalPathLen( MachineBasicBlock *MBB, MachineInstr *Root, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl<MachineInstr *> &InsInstrs, - DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) { + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + bool NewCodeHasLessInsts) { assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n"); // NewRoot is the last instruction in the \p InsInstrs vector. @@ -245,9 +251,13 @@ bool MachineCombiner::preservesCriticalPathLen( dbgs() << " RootDepth + RootLatency + RootSlack " << RootDepth + RootLatency + RootSlack << "\n";); - /// True when the new sequence does not lengthen the critical path. - return ((NewRootDepth + NewRootLatency) <= - (RootDepth + RootLatency + RootSlack)); + unsigned NewCycleCount = NewRootDepth + NewRootLatency; + unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; + + if (NewCodeHasLessInsts) + return NewCycleCount <= OldCycleCount; + else + return NewCycleCount < OldCycleCount; } /// helper routine to convert instructions into SC @@ -359,18 +369,21 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { Traces->verifyAnalysis(); TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, InstrIdxForVirtReg); + unsigned NewInstCount = InsInstrs.size(); + unsigned OldInstCount = DelInstrs.size(); // Found pattern, but did not generate alternative sequence. // This can happen e.g. when an immediate could not be materialized // in a single instruction. - if (!InsInstrs.size()) + if (!NewInstCount) continue; // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases // resource pressure. - if (doSubstitute(InsInstrs.size(), DelInstrs.size()) || - (preservesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, - InstrIdxForVirtReg) && + if (doSubstitute(NewInstCount, OldInstCount) || + (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg, + NewInstCount < OldInstCount) && preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { for (auto *InstrPtr : InsInstrs) MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); |