diff options
| author | Evan Cheng <evan.cheng@apple.com> | 2010-11-03 00:45:17 +0000 |
|---|---|---|
| committer | Evan Cheng <evan.cheng@apple.com> | 2010-11-03 00:45:17 +0000 |
| commit | debf9c502a76715d788051667b8bd8b2adc2ec86 (patch) | |
| tree | 130824858f83219eae819ab37b8d418ac23faed8 /llvm/lib/CodeGen | |
| parent | 634ab6c2b7f66cb8b744edfc6b9c11c83a4c40c3 (diff) | |
| download | bcm5719-llvm-debf9c502a76715d788051667b8bd8b2adc2ec86.tar.gz bcm5719-llvm-debf9c502a76715d788051667b8bd8b2adc2ec86.zip | |
Two sets of changes. Sorry they are intermingled.
1. Fix pre-ra scheduler so it doesn't try to push instructions above calls to
"optimize for latency". Call instructions don't have the right latency and
this is more likely to use introduce spills.
2. Fix if-converter cost function. For ARM, it should use instruction latencies,
not # of micro-ops since multi-latency instructions is completely executed
even when the predicate is false. Also, some instruction will be "slower"
when they are predicated due to the register def becoming implicit input.
rdar://8598427
llvm-svn: 118135
Diffstat (limited to 'llvm/lib/CodeGen')
| -rw-r--r-- | llvm/lib/CodeGen/IfConversion.cpp | 59 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 11 |
4 files changed, 56 insertions, 30 deletions
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp index e90985bf899..c05aa40b674 100644 --- a/llvm/lib/CodeGen/IfConversion.cpp +++ b/llvm/lib/CodeGen/IfConversion.cpp @@ -93,7 +93,8 @@ namespace { /// ClobbersPred - True if BB could modify predicates (e.g. has /// cmp, call, etc.) /// NonPredSize - Number of non-predicated instructions. - /// ExtraCost - Extra cost for microcoded instructions. + /// ExtraCost - Extra cost for multi-cycle instructions. + /// ExtraCost2 - Some instructions are slower when predicated /// BB - Corresponding MachineBasicBlock. /// TrueBB / FalseBB- See AnalyzeBranch(). /// BrCond - Conditions for end of block conditional branches. @@ -110,6 +111,7 @@ namespace { bool ClobbersPred : 1; unsigned NonPredSize; unsigned ExtraCost; + unsigned ExtraCost2; MachineBasicBlock *BB; MachineBasicBlock *TrueBB; MachineBasicBlock *FalseBB; @@ -119,7 +121,7 @@ namespace { IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), HasFallThrough(false), IsUnpredicable(false), CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), - ExtraCost(0), BB(0), TrueBB(0), FalseBB(0) {} + ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {} }; /// IfcvtToken - Record information about pending if-conversions to attempt: @@ -203,17 +205,20 @@ namespace { bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); - bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size, + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, + unsigned Cycle, unsigned Extra, float Prediction, float Confidence) const { - return Size > 0 && TII->isProfitableToIfCvt(BB, Size, - Prediction, Confidence); + return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, + Prediction, Confidence); } - bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, - MachineBasicBlock &FBB, unsigned FSize, + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, + unsigned TCycle, unsigned TExtra, + MachineBasicBlock &FBB, + unsigned FCycle, unsigned FExtra, float Prediction, float Confidence) const { - return TSize > 0 && FSize > 0 && - TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize, + return TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, Prediction, Confidence); } @@ -649,6 +654,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // Then scan all the instructions. BBI.NonPredSize = 0; BBI.ExtraCost = 0; + BBI.ExtraCost2 = 0; BBI.ClobbersPred = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { @@ -665,9 +671,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (!isCondBr) { if (!isPredicated) { BBI.NonPredSize++; - unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins); - if (NumOps > 1) - BBI.ExtraCost += NumOps-1; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, + &ExtraPredCost); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; } else if (!AlreadyPredicated) { // FIXME: This instruction is already predicated before the // if-conversion pass. It's probably something like a conditional move. @@ -815,9 +824,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + - TrueBBI.ExtraCost), + TrueBBI.ExtraCost), TrueBBI.ExtraCost2, *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + - FalseBBI.ExtraCost), + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { @@ -836,7 +845,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -851,7 +860,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; @@ -859,7 +868,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) && MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, - Prediction, Confidence) && + TrueBBI.ExtraCost2, Prediction, Confidence) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB @@ -878,7 +887,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, 1.0-Prediction, Confidence) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; @@ -888,7 +897,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, 1.0-Prediction, Confidence) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; @@ -897,7 +906,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) && MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize + FalseBBI.ExtraCost, - 1.0-Prediction, Confidence) && + FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -1427,9 +1436,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - unsigned NumOps = TII->getNumMicroOps(MI, InstrItins); - if (NumOps > 1) - ToBBI.ExtraCost += NumOps-1; + unsigned ExtraPredCost = 0; + unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + if (NumCycles > 1) + ToBBI.ExtraCost += NumCycles-1; + ToBBI.ExtraCost2 += ExtraPredCost; if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { @@ -1504,8 +1515,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.NonPredSize += FromBBI.NonPredSize; ToBBI.ExtraCost += FromBBI.ExtraCost; + ToBBI.ExtraCost2 += FromBBI.ExtraCost2; FromBBI.NonPredSize = 0; FromBBI.ExtraCost = 0; + FromBBI.ExtraCost2 = 0; ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.HasFallThrough = FromBBI.HasFallThrough; diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index abd68caf122..e86a78c6919 100644 --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -238,6 +238,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { "Cannot schedule terminators or labels!"); // Create the SUnit for this MI. SUnit *SU = NewSUnit(MI); + SU->isCall = TID.isCall(); + SU->isCommutable = TID.isCommutable(); // Assign the Latency field of SU using target-provided information. if (UnitLatencies) @@ -564,9 +566,9 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { // extra time. if (SU->getInstr()->getDesc().mayLoad()) SU->Latency += 2; - } else - SU->Latency = - InstrItins->getStageLatency(SU->getInstr()->getDesc().getSchedClass()); + } else { + SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr()); + } } void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ea1aaa1e05c..9978d00f20f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1589,6 +1589,10 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for @@ -1648,6 +1652,10 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ bool ilp_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + bool LHigh = SPQ->HighRegPressure(left); bool RHigh = SPQ->HighRegPressure(right); // Avoid causing spills. If register pressure is high, schedule for diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 7d01bd31b96..429b1152b07 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isCall = Old->isCall; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; @@ -300,6 +301,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { N = N->getOperand(N->getNumOperands()-1).getNode(); assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; } // Scan down to find any flagged succs. @@ -316,6 +319,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); N = *UI; + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; break; } if (!HasFlagUse) break; @@ -438,10 +443,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { // all nodes flagged together into this SUnit. SU->Latency = 0; for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) - if (N->isMachineOpcode()) { - SU->Latency += InstrItins-> - getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); - } + if (N->isMachineOpcode()) + SU->Latency += TII->getInstrLatency(InstrItins, N); } void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, |

