Two sets of changes. Sorry they are intermingled.

1. Fix pre-ra scheduler so it doesn't try to push instructions above calls to "optimize for latency". Call instructions don't have the right latency and this is more likely to use introduce spills. 2. Fix if-converter cost function. For ARM, it should use instruction latencies, not # of micro-ops since multi-latency instructions is completely executed even when the predicate is false. Also, some instruction will be "slower" when they are predicated due to the register def becoming implicit input. rdar://8598427 llvm-svn: 118135
author: Evan Cheng <evan.cheng@apple.com> 2010-11-03 00:45:17 +0000
committer: Evan Cheng <evan.cheng@apple.com> 2010-11-03 00:45:17 +0000
commit: debf9c502a76715d788051667b8bd8b2adc2ec86 (patch)
tree: 130824858f83219eae819ab37b8d418ac23faed8 /llvm/lib/CodeGen
parent: 634ab6c2b7f66cb8b744edfc6b9c11c83a4c40c3 (diff)
download: bcm5719-llvm-debf9c502a76715d788051667b8bd8b2adc2ec86.tar.gz
bcm5719-llvm-debf9c502a76715d788051667b8bd8b2adc2ec86.zip
4 files changed, 56 insertions, 30 deletions
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index e90985bf899..c05aa40b674 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -93,7 +93,8 @@ namespace {
     /// ClobbersPred    - True if BB could modify predicates (e.g. has
     ///                   cmp, call, etc.)
     /// NonPredSize     - Number of non-predicated instructions.
-    /// ExtraCost       - Extra cost for microcoded instructions.
+    /// ExtraCost       - Extra cost for multi-cycle instructions.
+    /// ExtraCost2      - Some instructions are slower when predicated
     /// BB              - Corresponding MachineBasicBlock.
     /// TrueBB / FalseBB- See AnalyzeBranch().
     /// BrCond          - Conditions for end of block conditional branches.
@@ -110,6 +111,7 @@ namespace {
       bool ClobbersPred    : 1;
       unsigned NonPredSize;
       unsigned ExtraCost;
+      unsigned ExtraCost2;
       MachineBasicBlock *BB;
       MachineBasicBlock *TrueBB;
       MachineBasicBlock *FalseBB;
@@ -119,7 +121,7 @@ namespace {
                  IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
                  HasFallThrough(false), IsUnpredicable(false),
                  CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
-                 ExtraCost(0), BB(0), TrueBB(0), FalseBB(0) {}
+                 ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
     };
 
     /// IfcvtToken - Record information about pending if-conversions to attempt:
@@ -203,17 +205,20 @@ namespace {
                                bool IgnoreBr = false);
     void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
 
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size,
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+                            unsigned Cycle, unsigned Extra,
                             float Prediction, float Confidence) const {
-      return Size > 0 && TII->isProfitableToIfCvt(BB, Size,
-                                                  Prediction, Confidence);
+      return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+                                                   Prediction, Confidence);
     }
 
-    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
-                            MachineBasicBlock &FBB, unsigned FSize,
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+                            unsigned TCycle, unsigned TExtra,
+                            MachineBasicBlock &FBB,
+                            unsigned FCycle, unsigned FExtra,
                             float Prediction, float Confidence) const {
-      return TSize > 0 && FSize > 0 &&
-        TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize,
+      return TCycle > 0 && FCycle > 0 &&
+        TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
                                  Prediction, Confidence);
     }
 
@@ -649,6 +654,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
   // Then scan all the instructions.
   BBI.NonPredSize = 0;
   BBI.ExtraCost = 0;
+  BBI.ExtraCost2 = 0;
   BBI.ClobbersPred = false;
   for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
        I != E; ++I) {
@@ -665,9 +671,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (!isCondBr) {
       if (!isPredicated) {
         BBI.NonPredSize++;
-        unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins);
-        if (NumOps > 1)
-          BBI.ExtraCost += NumOps-1;
+        unsigned ExtraPredCost = 0;
+        unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+                                                  &ExtraPredCost);
+        if (NumCycles > 1)
+          BBI.ExtraCost += NumCycles-1;
+        BBI.ExtraCost2 += ExtraPredCost;
       } else if (!AlreadyPredicated) {
         // FIXME: This instruction is already predicated before the
         // if-conversion pass. It's probably something like a conditional move.
@@ -815,9 +824,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
   
   if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
-                                       TrueBBI.ExtraCost),
+                                       TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
                          *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
-                                        FalseBBI.ExtraCost),
+                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
                          Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
       FeasibilityAnalysis(FalseBBI, RevCond)) {
@@ -836,7 +845,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 
   if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
     // Triangle:
     //   EBB
@@ -851,7 +860,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 
   if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
     Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
     Enqueued = true;
@@ -859,7 +868,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 
   if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
     // Simple (split, no rejoin):
     //   EBB
@@ -878,7 +887,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
                       1.0-Prediction, Confidence) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
       Enqueued = true;
@@ -888,7 +897,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
                       1.0-Prediction, Confidence) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
       Enqueued = true;
@@ -897,7 +906,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
         FeasibilityAnalysis(FalseBBI, RevCond)) {
       Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
       Enqueued = true;
@@ -1427,9 +1436,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
     MachineInstr *MI = MF.CloneMachineInstr(I);
     ToBBI.BB->insert(ToBBI.BB->end(), MI);
     ToBBI.NonPredSize++;
-    unsigned NumOps = TII->getNumMicroOps(MI, InstrItins);
-    if (NumOps > 1)
-      ToBBI.ExtraCost += NumOps-1;
+    unsigned ExtraPredCost = 0;
+    unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+    if (NumCycles > 1)
+      ToBBI.ExtraCost += NumCycles-1;
+    ToBBI.ExtraCost2 += ExtraPredCost;
 
     if (!TII->isPredicated(I) && !MI->isDebugValue()) {
       if (!TII->PredicateInstruction(MI, Cond)) {
@@ -1504,8 +1515,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
 
   ToBBI.NonPredSize += FromBBI.NonPredSize;
   ToBBI.ExtraCost += FromBBI.ExtraCost;
+  ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
   FromBBI.NonPredSize = 0;
   FromBBI.ExtraCost = 0;
+  FromBBI.ExtraCost2 = 0;
 
   ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
   ToBBI.HasFallThrough = FromBBI.HasFallThrough;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index abd68caf122..e86a78c6919 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -238,6 +238,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
            "Cannot schedule terminators or labels!");
     // Create the SUnit for this MI.
     SUnit *SU = NewSUnit(MI);
+    SU->isCall = TID.isCall();
+    SU->isCommutable = TID.isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
     if (UnitLatencies)
@@ -564,9 +566,9 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
     // extra time.
     if (SU->getInstr()->getDesc().mayLoad())
       SU->Latency += 2;
-  } else
-    SU->Latency =
-      InstrItins->getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+  } else {
+    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+  }
 }
 
 void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index ea1aaa1e05c..9978d00f20f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1589,6 +1589,10 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
 }
 
 bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
   bool LHigh = SPQ->HighRegPressure(left);
   bool RHigh = SPQ->HighRegPressure(right);
   // Avoid causing spills. If register pressure is high, schedule for
@@ -1648,6 +1652,10 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
 
 bool ilp_ls_rr_sort::operator()(const SUnit *left,
                                 const SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
   bool LHigh = SPQ->HighRegPressure(left);
   bool RHigh = SPQ->HighRegPressure(right);
   // Avoid causing spills. If register pressure is high, schedule for
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 7d01bd31b96..429b1152b07 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
   SUnit *SU = NewSUnit(Old->getNode());
   SU->OrigNode = Old->OrigNode;
   SU->Latency = Old->Latency;
+  SU->isCall = Old->isCall;
   SU->isTwoAddress = Old->isTwoAddress;
   SU->isCommutable = Old->isCommutable;
   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -300,6 +301,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
       N = N->getOperand(N->getNumOperands()-1).getNode();
       assert(N->getNodeId() == -1 && "Node already inserted!");
       N->setNodeId(NodeSUnit->NodeNum);
+      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+        NodeSUnit->isCall = true;
     }
     
     // Scan down to find any flagged succs.
@@ -316,6 +319,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
           assert(N->getNodeId() == -1 && "Node already inserted!");
           N->setNodeId(NodeSUnit->NodeNum);
           N = *UI;
+          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+            NodeSUnit->isCall = true;
           break;
         }
       if (!HasFlagUse) break;
@@ -438,10 +443,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
   // all nodes flagged together into this SUnit.
   SU->Latency = 0;
   for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
-    if (N->isMachineOpcode()) {
-      SU->Latency += InstrItins->
-        getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
-    }
+    if (N->isMachineOpcode())
+      SU->Latency += TII->getInstrLatency(InstrItins, N);
 }
 
 void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
author	Evan Cheng <evan.cheng@apple.com>	2010-11-03 00:45:17 +0000
committer	Evan Cheng <evan.cheng@apple.com>	2010-11-03 00:45:17 +0000
commit	debf9c502a76715d788051667b8bd8b2adc2ec86 (patch)
tree	130824858f83219eae819ab37b8d418ac23faed8 /llvm/lib/CodeGen
parent	634ab6c2b7f66cb8b744edfc6b9c11c83a4c40c3 (diff)
download	bcm5719-llvm-debf9c502a76715d788051667b8bd8b2adc2ec86.tar.gz bcm5719-llvm-debf9c502a76715d788051667b8bd8b2adc2ec86.zip