summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
diff options
context:
space:
mode:
authorReid Kleckner <rnk@google.com>2018-03-14 21:54:21 +0000
committerReid Kleckner <rnk@google.com>2018-03-14 21:54:21 +0000
commit3a7a2e4a0ae44b6a035b09a95babe2bc6c646323 (patch)
tree170fd9cc4a0db0c5a54d565791a1fe4bac31a365 /llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
parente85b06d65f695c576df1e529ee37cb15e6902401 (diff)
downloadbcm5719-llvm-3a7a2e4a0ae44b6a035b09a95babe2bc6c646323.tar.gz
bcm5719-llvm-3a7a2e4a0ae44b6a035b09a95babe2bc6c646323.zip
[FastISel] Sink local value materializations to first use
Summary: Local values are constants, global addresses, and stack addresses that can't be folded into the instruction that uses them. For example, when storing the address of a global variable into memory, we need to materialize that address into a register. FastISel doesn't want to materialize any given local value more than once, so it generates all local value materialization code at EmitStartPt, which always dominates the current insertion point. This allows it to maintain a map of local value registers, and it knows that the local value area will always dominate the current insertion point. The downside is that local value instructions are always emitted without a source location. This is done to prevent jumpy line tables, but it means that the local value area will be considered part of the previous statement. Consider this C code: call1(); // line 1 ++global; // line 2 ++global; // line 3 call2(&global, &local); // line 4 Today we end up with assembly and line tables like this: .loc 1 1 callq call1 leaq global(%rip), %rdi leaq local(%rsp), %rsi .loc 1 2 addq $1, global(%rip) .loc 1 3 addq $1, global(%rip) .loc 1 4 callq call2 The LEA instructions in the local value area have no source location and are treated as being on line 1. Stepping through the code in a debugger and correlating it with the assembly won't make much sense, because these materializations are only required for line 4. This is actually problematic for the VS debugger "set next statement" feature, which effectively assumes that there are no registers live across statement boundaries. By sinking the local value code into the statement and fixing up the source location, we can make that feature work. This was filed as https://bugs.llvm.org/show_bug.cgi?id=35975 and https://crbug.com/793819. This change is obviously not enough to make this feature work reliably in all cases, but I felt that it was worth doing anyway because it usually generates smaller, more comprehensible -O0 code. I measured a 0.12% regression in code generation time with LLC on the sqlite3 amalgamation, so I think this is worth doing. There are some special cases worth calling out in the commit message: 1. local values materialized for phis 2. local values used by no-op casts 3. dead local value code Local values can be materialized for phis, and this does not show up as a vreg use in MachineRegisterInfo. In this case, if there are no other uses, this patch sinks the value to the first terminator, EH label, or the end of the BB if nothing else exists. Local values may also be used by no-op casts, which adds the register to the RegFixups table. Without reversing the RegFixups map direction, we don't have enough information to sink these instructions. Lastly, if the local value register has no other uses, we can delete it. This comes up when fastisel tries two instruction selection approaches and the first materializes the value but fails and the second succeeds without using the local value. Reviewers: aprantl, dblaikie, qcolombet, MatzeB, vsk, echristo Subscribers: dotdash, chandlerc, hans, sdardis, amccarth, javed.absar, zturner, llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D43093 llvm-svn: 327581
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/FastISel.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp159
1 files changed, 156 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 0a420ae0145..b1aeb62f635 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -120,9 +120,10 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
STATISTIC(NumFastIselDead, "Number of dead insts removed on failure");
/// Set the current block to which generated machine instructions will be
-/// appended, and clear the local CSE map.
+/// appended.
void FastISel::startNewBlock() {
- LocalValueMap.clear();
+ assert(LocalValueMap.empty() &&
+ "local values should be cleared after finishing a BB");
// Instructions are appended to FuncInfo.MBB. If the basic block already
// contains labels or copies, use the last instruction as the last local
@@ -133,6 +134,9 @@ void FastISel::startNewBlock() {
LastLocalValue = EmitStartPt;
}
+/// Flush the local CSE map and sink anything we can.
+void FastISel::finishBasicBlock() { flushLocalValueMap(); }
+
bool FastISel::lowerArguments() {
if (!FuncInfo.CanLowerReturn)
// Fallback to SDISel argument lowering code to deal with sret pointer
@@ -153,13 +157,160 @@ bool FastISel::lowerArguments() {
return true;
}
+/// Return the defined register if this instruction defines exactly one
+/// virtual register and uses no other virtual registers. Otherwise return 0.
+static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
+ unsigned RegDef = 0;
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ if (MO.isDef()) {
+ if (RegDef)
+ return 0;
+ RegDef = MO.getReg();
+ } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ // This is another use of a vreg. Don't try to sink it.
+ return 0;
+ }
+ }
+ return RegDef;
+}
+
void FastISel::flushLocalValueMap() {
+ // Try to sink local values down to their first use so that we can give them a
+ // better debug location. This has the side effect of shrinking local value
+ // live ranges, which helps out fast regalloc.
+ if (LastLocalValue != EmitStartPt) {
+ // Sink local value materialization instructions between EmitStartPt and
+ // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to
+ // avoid inserting into the range that we're iterating over.
+ MachineBasicBlock::reverse_iterator RE =
+ EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt)
+ : FuncInfo.MBB->rend();
+ MachineBasicBlock::reverse_iterator RI(LastLocalValue);
+
+ InstOrderMap OrderMap;
+ for (; RI != RE;) {
+ MachineInstr &LocalMI = *RI;
+ ++RI;
+ bool Store = true;
+ if (!LocalMI.isSafeToMove(nullptr, Store))
+ continue;
+ unsigned DefReg = findSinkableLocalRegDef(LocalMI);
+ if (DefReg == 0)
+ continue;
+
+ sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap);
+ }
+ }
+
LocalValueMap.clear();
LastLocalValue = EmitStartPt;
recomputeInsertPt();
SavedInsertPt = FuncInfo.InsertPt;
}
+static bool isRegUsedByPhiNodes(unsigned DefReg,
+ FunctionLoweringInfo &FuncInfo) {
+ for (auto &P : FuncInfo.PHINodesToUpdate)
+ if (P.second == DefReg)
+ return true;
+ return false;
+}
+
+/// Build a map of instruction orders. Return the first terminator and its
+/// order. Consider EH_LABEL instructions to be terminators as well, since local
+/// values for phis after invokes must be materialized before the call.
+void FastISel::InstOrderMap::initialize(MachineBasicBlock *MBB) {
+ unsigned Order = 0;
+ for (MachineInstr &I : *MBB) {
+ if (!FirstTerminator &&
+ (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) {
+ FirstTerminator = &I;
+ FirstTerminatorOrder = Order;
+ }
+ Orders[&I] = Order++;
+ }
+}
+
+void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI,
+ unsigned DefReg,
+ InstOrderMap &OrderMap) {
+ // If this register is used by a register fixup, MRI will not contain all
+ // the uses until after register fixups, so don't attempt to sink or DCE
+ // this instruction. Register fixups typically come from no-op cast
+ // instructions, which replace the cast instruction vreg with the local
+ // value vreg.
+ if (FuncInfo.RegsWithFixups.count(DefReg))
+ return;
+
+ // We can DCE this instruction if there are no uses and it wasn't a
+ // materialized for a successor PHI node.
+ bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo);
+ if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) {
+ if (EmitStartPt == &LocalMI)
+ EmitStartPt = EmitStartPt->getPrevNode();
+ DEBUG(dbgs() << "removing dead local value materialization " << LocalMI);
+ OrderMap.Orders.erase(&LocalMI);
+ LocalMI.eraseFromParent();
+ return;
+ }
+
+ // Number the instructions if we haven't yet so we can efficiently find the
+ // earliest use.
+ if (OrderMap.Orders.empty())
+ OrderMap.initialize(FuncInfo.MBB);
+
+ // Find the first user in the BB.
+ MachineInstr *FirstUser = nullptr;
+ unsigned FirstOrder = std::numeric_limits<unsigned>::max();
+ for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) {
+ unsigned UseOrder = OrderMap.Orders[&UseInst];
+ if (UseOrder < FirstOrder) {
+ FirstOrder = UseOrder;
+ FirstUser = &UseInst;
+ }
+ }
+
+ // The insertion point will be the first terminator or the first user,
+ // whichever came first. If there was no terminator, this must be a
+ // fallthrough block and the insertion point is the end of the block.
+ MachineBasicBlock::instr_iterator SinkPos;
+ if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) {
+ FirstOrder = OrderMap.FirstTerminatorOrder;
+ SinkPos = OrderMap.FirstTerminator->getIterator();
+ } else if (FirstUser) {
+ SinkPos = FirstUser->getIterator();
+ } else {
+ assert(UsedByPHI && "must be users if not used by a phi");
+ SinkPos = FuncInfo.MBB->instr_end();
+ }
+
+ // Collect all DBG_VALUEs before the new insertion position so that we can
+ // sink them.
+ SmallVector<MachineInstr *, 1> DbgValues;
+ for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) {
+ if (!DbgVal.isDebugValue())
+ continue;
+ unsigned UseOrder = OrderMap.Orders[&DbgVal];
+ if (UseOrder < FirstOrder)
+ DbgValues.push_back(&DbgVal);
+ }
+
+ // Sink LocalMI before SinkPos and assign it the same DebugLoc.
+ DEBUG(dbgs() << "sinking local value to first use " << LocalMI);
+ FuncInfo.MBB->remove(&LocalMI);
+ FuncInfo.MBB->insert(SinkPos, &LocalMI);
+ if (SinkPos != FuncInfo.MBB->end())
+ LocalMI.setDebugLoc(SinkPos->getDebugLoc());
+
+ // Sink any debug values that we've collected.
+ for (MachineInstr *DI : DbgValues) {
+ FuncInfo.MBB->remove(DI);
+ FuncInfo.MBB->insert(SinkPos, DI);
+ }
+}
+
bool FastISel::hasTrivialKill(const Value *V) {
// Don't consider constants or arguments to have trivial kills.
const Instruction *I = dyn_cast<Instruction>(V);
@@ -328,8 +479,10 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) {
AssignedReg = Reg;
else if (Reg != AssignedReg) {
// Arrange for uses of AssignedReg to be replaced by uses of Reg.
- for (unsigned i = 0; i < NumRegs; i++)
+ for (unsigned i = 0; i < NumRegs; i++) {
FuncInfo.RegFixups[AssignedReg + i] = Reg + i;
+ FuncInfo.RegsWithFixups.insert(Reg + i);
+ }
AssignedReg = Reg;
}
OpenPOWER on IntegriCloud