summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp278
-rw-r--r--llvm/test/CodeGen/X86/pr32610.ll40
2 files changed, 190 insertions, 128 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 4f4025d8ae6..579112c9bfc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -226,6 +226,7 @@ private:
void UnscheduleNodeBottomUp(SUnit*);
void RestoreHazardCheckerBottomUp();
void BacktrackBottomUp(SUnit*, SUnit*);
+ SUnit *TryUnfoldSU(SUnit *);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
@@ -780,7 +781,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
-/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// unscheduled, increase the succ left count of its predecessors. Remove
/// them from AvailableQueue if necessary.
void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
@@ -934,6 +935,146 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
return false;
}
+/// TryUnfold - Attempt to unfold
+SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
+ SDNode *N = SU->getNode();
+ // Use while over if to ease fall through.
+ SmallVector<SDNode *, 2> NewNodes;
+ if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ return nullptr;
+
+ // unfolding an x86 DEC64m operation results in store, dec, load which
+ // can't be handled here so quit
+ if (NewNodes.size() == 3)
+ return nullptr;
+
+ assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+ N = NewNodes[1];
+ SDNode *LoadNode = NewNodes[0];
+ unsigned NumVals = N->getNumValues();
+ unsigned OldNumVals = SU->getNode()->getNumValues();
+
+ // LoadNode may already exist. This can happen when there is another
+ // load from the same location and producing the same type of value
+ // but it has different alignment or volatileness.
+ bool isNewLoad = true;
+ SUnit *LoadSU;
+ if (LoadNode->getNodeId() != -1) {
+ LoadSU = &SUnits[LoadNode->getNodeId()];
+ // If LoadSU has already been scheduled, we should clone it but
+ // this would negate the benefit to unfolding so just return SU.
+ if (LoadSU->isScheduled)
+ return SU;
+ isNewLoad = false;
+ } else {
+ LoadSU = CreateNewSUnit(LoadNode);
+ LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
+ computeLatency(LoadSU);
+ }
+
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+
+ // Now that we are committed to unfolding replace DAG Uses.
+ for (unsigned i = 0; i != NumVals; ++i)
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+ DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1),
+ SDValue(LoadNode, 1));
+
+ SUnit *NewSU = CreateNewSUnit(N);
+ assert(N->getNodeId() == -1 && "Node already inserted!");
+ N->setNodeId(NewSU->NodeNum);
+
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+ if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
+ NewSU->isTwoAddress = true;
+ break;
+ }
+ }
+ if (MCID.isCommutable())
+ NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
+ computeLatency(NewSU);
+
+ // Record all the edges to and from the old SU, by category.
+ SmallVector<SDep, 4> ChainPreds;
+ SmallVector<SDep, 4> ChainSuccs;
+ SmallVector<SDep, 4> LoadPreds;
+ SmallVector<SDep, 4> NodePreds;
+ SmallVector<SDep, 4> NodeSuccs;
+ for (SDep &Pred : SU->Preds) {
+ if (Pred.isCtrl())
+ ChainPreds.push_back(Pred);
+ else if (isOperandOf(Pred.getSUnit(), LoadNode))
+ LoadPreds.push_back(Pred);
+ else
+ NodePreds.push_back(Pred);
+ }
+ for (SDep &Succ : SU->Succs) {
+ if (Succ.isCtrl())
+ ChainSuccs.push_back(Succ);
+ else
+ NodeSuccs.push_back(Succ);
+ }
+
+ // Now assign edges to the newly-created nodes.
+ for (const SDep &Pred : ChainPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : LoadPreds) {
+ RemovePred(SU, Pred);
+ if (isNewLoad)
+ AddPred(LoadSU, Pred);
+ }
+ for (const SDep &Pred : NodePreds) {
+ RemovePred(SU, Pred);
+ AddPred(NewSU, Pred);
+ }
+ for (SDep D : NodeSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ D.setSUnit(NewSU);
+ AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
+ !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
+ }
+ for (SDep D : ChainSuccs) {
+ SUnit *SuccDep = D.getSUnit();
+ D.setSUnit(SU);
+ RemovePred(SuccDep, D);
+ if (isNewLoad) {
+ D.setSUnit(LoadSU);
+ AddPred(SuccDep, D);
+ }
+ }
+
+ // Add a data dependency to reflect that NewSU reads the value defined
+ // by LoadSU.
+ SDep D(LoadSU, SDep::Data, 0);
+ D.setLatency(LoadSU->Latency);
+ AddPred(NewSU, D);
+
+ if (isNewLoad)
+ AvailableQueue->addNode(LoadSU);
+ AvailableQueue->addNode(NewSU);
+
+ ++NumUnfolds;
+
+ if (NewSU->NumSuccsLeft == 0)
+ NewSU->isAvailable = true;
+
+ return NewSU;
+}
+
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
@@ -959,135 +1100,16 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return nullptr;
}
+ // If possible unfold instruction.
if (TryUnfold) {
- SmallVector<SDNode*, 2> NewNodes;
- if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+ SUnit *UnfoldSU = TryUnfoldSU(SU);
+ if (!UnfoldSU)
return nullptr;
-
- // unfolding an x86 DEC64m operation results in store, dec, load which
- // can't be handled here so quit
- if (NewNodes.size() == 3)
- return nullptr;
-
- DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
- assert(NewNodes.size() == 2 && "Expected a load folding node!");
-
- N = NewNodes[1];
- SDNode *LoadNode = NewNodes[0];
- unsigned NumVals = N->getNumValues();
- unsigned OldNumVals = SU->getNode()->getNumValues();
- for (unsigned i = 0; i != NumVals; ++i)
- DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
- DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
- SDValue(LoadNode, 1));
-
- // LoadNode may already exist. This can happen when there is another
- // load from the same location and producing the same type of value
- // but it has different alignment or volatileness.
- bool isNewLoad = true;
- SUnit *LoadSU;
- if (LoadNode->getNodeId() != -1) {
- LoadSU = &SUnits[LoadNode->getNodeId()];
- isNewLoad = false;
- } else {
- LoadSU = CreateNewSUnit(LoadNode);
- LoadNode->setNodeId(LoadSU->NodeNum);
-
- InitNumRegDefsLeft(LoadSU);
- computeLatency(LoadSU);
- }
-
- SUnit *NewSU = CreateNewSUnit(N);
- assert(N->getNodeId() == -1 && "Node already inserted!");
- N->setNodeId(NewSU->NodeNum);
-
- const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
- for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
- if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
- NewSU->isTwoAddress = true;
- break;
- }
- }
- if (MCID.isCommutable())
- NewSU->isCommutable = true;
-
- InitNumRegDefsLeft(NewSU);
- computeLatency(NewSU);
-
- // Record all the edges to and from the old SU, by category.
- SmallVector<SDep, 4> ChainPreds;
- SmallVector<SDep, 4> ChainSuccs;
- SmallVector<SDep, 4> LoadPreds;
- SmallVector<SDep, 4> NodePreds;
- SmallVector<SDep, 4> NodeSuccs;
- for (SDep &Pred : SU->Preds) {
- if (Pred.isCtrl())
- ChainPreds.push_back(Pred);
- else if (isOperandOf(Pred.getSUnit(), LoadNode))
- LoadPreds.push_back(Pred);
- else
- NodePreds.push_back(Pred);
- }
- for (SDep &Succ : SU->Succs) {
- if (Succ.isCtrl())
- ChainSuccs.push_back(Succ);
- else
- NodeSuccs.push_back(Succ);
- }
-
- // Now assign edges to the newly-created nodes.
- for (const SDep &Pred : ChainPreds) {
- RemovePred(SU, Pred);
- if (isNewLoad)
- AddPred(LoadSU, Pred);
- }
- for (const SDep &Pred : LoadPreds) {
- RemovePred(SU, Pred);
- if (isNewLoad)
- AddPred(LoadSU, Pred);
- }
- for (const SDep &Pred : NodePreds) {
- RemovePred(SU, Pred);
- AddPred(NewSU, Pred);
- }
- for (SDep D : NodeSuccs) {
- SUnit *SuccDep = D.getSUnit();
- D.setSUnit(SU);
- RemovePred(SuccDep, D);
- D.setSUnit(NewSU);
- AddPred(SuccDep, D);
- // Balance register pressure.
- if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
- && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
- --NewSU->NumRegDefsLeft;
- }
- for (SDep D : ChainSuccs) {
- SUnit *SuccDep = D.getSUnit();
- D.setSUnit(SU);
- RemovePred(SuccDep, D);
- if (isNewLoad) {
- D.setSUnit(LoadSU);
- AddPred(SuccDep, D);
- }
- }
-
- // Add a data dependency to reflect that NewSU reads the value defined
- // by LoadSU.
- SDep D(LoadSU, SDep::Data, 0);
- D.setLatency(LoadSU->Latency);
- AddPred(NewSU, D);
-
- if (isNewLoad)
- AvailableQueue->addNode(LoadSU);
- AvailableQueue->addNode(NewSU);
-
- ++NumUnfolds;
-
- if (NewSU->NumSuccsLeft == 0) {
- NewSU->isAvailable = true;
- return NewSU;
- }
- SU = NewSU;
+ SU = UnfoldSU;
+ N = SU->getNode();
+ // If this can be scheduled don't bother duplicating and just return
+ if (SU->NumSuccsLeft == 0)
+ return SU;
}
DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
diff --git a/llvm/test/CodeGen/X86/pr32610.ll b/llvm/test/CodeGen/X86/pr32610.ll
new file mode 100644
index 00000000000..1116cf6f1b2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr32610.ll
@@ -0,0 +1,40 @@
+; RUN: llc -o - %s | FileCheck %s
+
+; CHECK-LABEL: @pr32610
+; CHECK: movl L_b$non_lazy_ptr, [[BASEREG:%[a-z]+]]
+; CHECK: cmpl ([[BASEREG]]), {{%[a-z]+}}
+; CHECK: cmpl ([[BASEREG]]), {{%[a-z]+}}
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+target triple = "i386-apple-macosx10.13.0"
+
+@c = external local_unnamed_addr global i32, align 4
+@b = external local_unnamed_addr global [1 x i32], align 4
+@d = external local_unnamed_addr global i32, align 4
+
+; Function Attrs: norecurse nounwind optsize ssp
+define void @pr32610() local_unnamed_addr #0 {
+entry:
+ %0 = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i32 0, i32 undef), align 4, !tbaa !1
+ %cmp = icmp eq i32 undef, %0
+ %conv = zext i1 %cmp to i32
+ %tobool1.i = icmp ne i32 undef, 0
+ %or.cond.i = and i1 %cmp, %tobool1.i
+ %cond.i = select i1 %or.cond.i, i32 %conv, i32 undef
+ store i32 %cond.i, i32* @c, align 4, !tbaa !1
+ %1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @b, i32 0, i32 0), align 4
+ %tobool = icmp ne i32 %1, 0
+ %2 = select i1 %tobool, i32 %1, i32 undef
+ store i32 %2, i32* @d, align 4, !tbaa !1
+ ret void
+}
+
+attributes #0 = { norecurse nounwind optsize ssp "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 301507) (llvm/trunk 301505)"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"int", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
OpenPOWER on IntegriCloud