summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp70
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp52
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h5
-rw-r--r--llvm/test/CodeGen/Hexagon/swp-order-copies.ll37
5 files changed, 157 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index d0616fd678b..20141f7f8d6 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -369,8 +369,9 @@ public:
/// Set the Minimum Initiation Interval for this schedule attempt.
void setMII(unsigned mii) { MII = mii; }
- MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
- bool UpdateDAG = false);
+ void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule);
+
+ void fixupRegisterOverlaps(std::deque<SUnit *> &Instrs);
/// Return the new base register that was stored away for the changed
/// instruction.
@@ -3390,9 +3391,8 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
/// Apply changes to the instruction if needed. The changes are need
/// to improve the scheduling and depend up on the final schedule.
-MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
- SMSchedule &Schedule,
- bool UpdateDAG) {
+void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+ SMSchedule &Schedule) {
SUnit *SU = getSUnit(MI);
DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
InstrChanges.find(SU);
@@ -3400,7 +3400,7 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
std::pair<unsigned, int64_t> RegAndOffset = It->second;
unsigned BasePos, OffsetPos;
if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
- return nullptr;
+ return;
unsigned BaseReg = MI->getOperand(BasePos).getReg();
MachineInstr *LoopDef = findDefInLoop(BaseReg);
int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
@@ -3418,15 +3418,11 @@ MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
int64_t NewOffset =
MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
NewMI->getOperand(OffsetPos).setImm(NewOffset);
- if (UpdateDAG) {
- SU->setInstr(NewMI);
- MISUnitMap[NewMI] = SU;
- }
+ SU->setInstr(NewMI);
+ MISUnitMap[NewMI] = SU;
NewMIs.insert(NewMI);
- return NewMI;
}
}
- return nullptr;
}
/// Return true for an order dependence that is loop carried potentially.
@@ -3872,6 +3868,53 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
return true;
}
+/// Attempt to fix the degenerate cases when the instruction serialization
+/// causes the register lifetimes to overlap. For example,
+/// p' = store_pi(p, b)
+/// = load p, offset
+/// In this case p and p' overlap, which means that two registers are needed.
+/// Instead, this function changes the load to use p' and updates the offset.
+void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
+ unsigned OverlapReg = 0;
+ unsigned NewBaseReg = 0;
+ for (SUnit *SU : Instrs) {
+ MachineInstr *MI = SU->getInstr();
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ // Look for an instruction that uses p. The instruction occurs in the
+ // same cycle but occurs later in the serialized order.
+ if (MO.isReg() && MO.isUse() && MO.getReg() == OverlapReg) {
+ // Check that the instruction appears in the InstrChanges structure,
+ // which contains instructions that can have the offset updated.
+ DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+ InstrChanges.find(SU);
+ if (It != InstrChanges.end()) {
+ unsigned BasePos, OffsetPos;
+ // Update the base register and adjust the offset.
+ if (TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos)) {
+ MI->getOperand(BasePos).setReg(NewBaseReg);
+ int64_t Offset = MI->getOperand(OffsetPos).getImm();
+ MI->getOperand(OffsetPos).setImm(Offset - It->second.second);
+ }
+ }
+ OverlapReg = 0;
+ NewBaseReg = 0;
+ break;
+ }
+ // Look for an instruction of the form p' = op(p), which uses and defines
+ // two virtual registers that get allocated to the same physical register.
+ unsigned TiedUseIdx = 0;
+ if (MI->isRegTiedToUseOperand(i, &TiedUseIdx)) {
+ // OverlapReg is p in the example above.
+ OverlapReg = MI->getOperand(TiedUseIdx).getReg();
+ // NewBaseReg is p' in the example above.
+ NewBaseReg = MI->getOperand(i).getReg();
+ break;
+ }
+ }
+ }
+}
+
/// After the schedule has been formed, call this function to combine
/// the instructions from the different stages/cycles. That is, this
/// function creates a schedule that represents a single iteration.
@@ -3932,7 +3975,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
// map. We need to use the new registers to create the correct order.
for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
SUnit *SU = &SSD->SUnits[i];
- SSD->applyInstrChange(SU->getInstr(), *this, true);
+ SSD->applyInstrChange(SU->getInstr(), *this);
}
// Reorder the instructions in each cycle to fix and improve the
@@ -3956,6 +3999,7 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
// Replace the old order with the new order.
cycleInstrs.swap(newOrderZC);
cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+ SSD->fixupRegisterOverlaps(cycleInstrs);
}
DEBUG(dump(););
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index bcd1a5089c7..c2125bec3a5 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -1651,8 +1651,13 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
int &Value) const {
if (isPostIncrement(MI)) {
+ // For a post-increment, the offset is zero and the increment value is
+ // determined by the instruction's access size.
+ int Zero;
unsigned AccessSize;
- return getBaseAndOffset(MI, Value, AccessSize);
+ bool RetVal = getBaseAndOffset(MI, Zero, AccessSize);
+ Value = (int) AccessSize;
+ return RetVal;
}
if (MI.getOpcode() == Hexagon::A2_addi) {
Value = MI.getOperand(2).getImm();
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 0f407c2d836..2555b50f91c 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -496,6 +496,48 @@ void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) {
Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
}
+/// Return true if we can update the offset in MI so that MI and MJ
+/// can be packetized together.
+bool HexagonPacketizerList::updateOffset(SUnit *SUI, SUnit *SUJ) {
+ assert(SUI->getInstr() && SUJ->getInstr());
+ MachineInstr &MI = *SUI->getInstr();
+ MachineInstr &MJ = *SUJ->getInstr();
+
+ unsigned BPI, OPI;
+ if (!HII->getBaseAndOffsetPosition(MI, BPI, OPI))
+ return false;
+ unsigned BPJ, OPJ;
+ if (!HII->getBaseAndOffsetPosition(MJ, BPJ, OPJ))
+ return false;
+ unsigned Reg = MI.getOperand(BPI).getReg();
+ if (Reg != MJ.getOperand(BPJ).getReg())
+ return false;
+ // Make sure that the dependences do not restrict adding MI to the packet.
+ // That is, ignore anti dependences, and make sure the only data dependence
+ // involves the specific register.
+ for (const auto &PI : SUI->Preds)
+ if (PI.getKind() != SDep::Anti &&
+ (PI.getKind() != SDep::Data || PI.getReg() != Reg))
+ return false;
+ int Incr;
+ if (!HII->getIncrementValue(MJ, Incr))
+ return false;
+
+ int64_t Offset = MI.getOperand(OPI).getImm();
+ MI.getOperand(OPI).setImm(Offset + Incr);
+ ChangedOffset = Offset;
+ return true;
+}
+
+/// Undo the changed offset. This is needed if the instruction cannot be
+/// added to the current packet due to a different instruction.
+void HexagonPacketizerList::undoChangedOffset(MachineInstr &MI) {
+ unsigned BP, OP;
+ if (!HII->getBaseAndOffsetPosition(MI, BP, OP))
+ llvm_unreachable("Unable to find base and offset operands.");
+ MI.getOperand(OP).setImm(ChangedOffset);
+}
+
enum PredicateKind {
PK_False,
PK_True,
@@ -980,6 +1022,7 @@ void HexagonPacketizerList::initPacketizerState() {
GlueToNewValueJump = false;
GlueAllocframeStore = false;
FoundSequentialDependence = false;
+ ChangedOffset = INT64_MAX;
}
// Ignore bundling of pseudo instructions.
@@ -1567,6 +1610,15 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
useCalleesSP(I);
GlueAllocframeStore = false;
}
+
+ if (ChangedOffset != INT64_MAX)
+ undoChangedOffset(I);
+ else if (updateOffset(SUI, SUJ)) {
+ FoundSequentialDependence = false;
+ Dependence = false;
+ return true;
+ }
+
return false;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 3a7bdf5101e..cbdd2367429 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -38,6 +38,9 @@ class HexagonPacketizerList : public VLIWPacketizerList {
// Has the feeder instruction been glued to new value jump.
bool GlueToNewValueJump;
+ // This holds the offset value, when pruning the dependences.
+ int64_t ChangedOffset;
+
// Check if there is a dependence between some instruction already in this
// packet and this instruction.
bool Dependence;
@@ -117,6 +120,8 @@ protected:
bool demoteToDotOld(MachineInstr &MI);
bool useCallersSP(MachineInstr &MI);
void useCalleesSP(MachineInstr &MI);
+ bool updateOffset(SUnit *SUI, SUnit *SUJ);
+ void undoChangedOffset(MachineInstr &MI);
bool arePredicatesComplements(MachineInstr &MI1, MachineInstr &MI2);
bool restrictingDepExistInPacket(MachineInstr&, unsigned);
bool isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC);
diff --git a/llvm/test/CodeGen/Hexagon/swp-order-copies.ll b/llvm/test/CodeGen/Hexagon/swp-order-copies.ll
new file mode 100644
index 00000000000..5de0717654f
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-order-copies.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; Test that the instruction ordering code in the pipeliner fixes up dependences
+; between post-increment register definitions and uses so that the register
+; allocator does not allocate an additional register. The following test case
+; should generate a single packet.
+
+; CHECK: loop0(.LBB0_[[LOOP:.]],
+; CHECK: .LBB0_[[LOOP]]:
+; CHECK: {
+; CHECK-NOT: {
+; CHECK: :endloop0
+
+define void @test(i64* nocapture %v1, i64 %v2, i32 %len) local_unnamed_addr #0 {
+entry:
+ %cmp7 = icmp sgt i32 %len, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body:
+ %arrayidx.phi = phi i64* [ %arrayidx.inc, %for.body ], [ %v1, %entry ]
+ %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %0 = load i64, i64* %arrayidx.phi, align 8
+ %1 = tail call i64 @llvm.hexagon.M2.mmpyul.rs1(i64 %0, i64 %v2)
+ store i64 %1, i64* %arrayidx.phi, align 8
+ %inc = add nuw nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %inc, %len
+ %arrayidx.inc = getelementptr i64, i64* %arrayidx.phi, i32 1
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+declare i64 @llvm.hexagon.M2.mmpyul.rs1(i64, i64) #1
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" }
+attributes #1 = { nounwind readnone }
OpenPOWER on IntegriCloud