llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428

//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements a pass that transforms irreducible control flow into
/// reducible control flow. Irreducible control flow means multiple-entry
/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
/// due to being unnatural.
///
/// Note that LLVM has a generic pass that lowers irreducible control flow, but
/// it linearizes control flow, turning diamonds into two triangles, which is
/// both unnecessary and undesirable for WebAssembly.
///
/// The big picture: Ignoring natural loops (seeing them monolithically), we
/// find all the blocks which can return to themselves ("loopers"). Loopers
/// reachable from the non-loopers are loop entries: if there are 2 or more,
/// then we have irreducible control flow. We fix that as follows: a new block
/// is created that can dispatch to each of the loop entries, based on the
/// value of a label "helper" variable, and we replace direct branches to the
/// entries with assignments to the label variable and a branch to the dispatch
/// block. Then the dispatch block is the single entry in a new natural loop.
///
/// This is similar to what the Relooper [1] does, both identify looping code
/// that requires multiple entries, and resolve it in a similar way. In
/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
/// also that like the Relooper, we implement a "minimal" intervention: we only
/// use the "label" helper for the blocks we absolutely must and no others. We
/// also prioritize code size and do not perform node splitting (i.e. we don't
/// duplicate code in order to resolve irreducibility).
///
/// The difference between this code and the Relooper is that the Relooper also
/// generates ifs and loops and works in a recursive manner, knowing at each
/// point what the entries are, and recursively breaks down the problem. Here
/// we just want to resolve irreducible control flow, and we also want to use
/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
/// identify natural loops, etc., and we start with the whole CFG and must
/// identify both the looping code and its entries.
///
/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
/// Proceedings of the ACM international conference companion on Object oriented
/// programming systems languages and applications companion (SPLASH '11). ACM,
/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
/// http://doi.acm.org/10.1145/2048147.2048224
///
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblySubtarget.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;

#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"

namespace {

class LoopFixer {
public:
  LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
      : MF(MF), MLI(MLI), Loop(Loop) {}

  // Run the fixer on the given inputs. Returns whether changes were made.
  bool run();

private:
  MachineFunction &MF;
  MachineLoopInfo &MLI;
  MachineLoop *Loop;

  MachineBasicBlock *Header;
  SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;

  using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
  DenseMap<MachineBasicBlock *, BlockSet> Reachable;

  // The worklist contains pairs of recent additions, (a, b), where we just
  // added a link a => b.
  using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
  SmallVector<BlockPair, 4> WorkList;

  // Get a canonical block to represent a block or a loop: the block, or if in
  // an inner loop, the loop header, of it in an outer loop scope, we can
  // ignore it. We need to call this on all blocks we work on.
  MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
    if (InnerLoop == Loop) {
      return MBB;
    } else {
      // This is either in an outer or an inner loop, and not in ours.
      if (!LoopBlocks.count(MBB)) {
        // It's in outer code, ignore it.
        return nullptr;
      }
      assert(InnerLoop);
      // It's in an inner loop, canonicalize it to the header of that loop.
      return InnerLoop->getHeader();
    }
  }

  // For a successor we can additionally ignore it if it's a branch back to a
  // natural loop top, as when we are in the scope of a loop, we just care
  // about internal irreducibility, and can ignore the loop we are in. We need
  // to call this on all blocks in a context where they are a successor.
  MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
    if (Loop && MBB == Loop->getHeader()) {
      // Ignore branches going to the loop's natural header.
      return nullptr;
    }
    return canonicalize(MBB);
  }

  // Potentially insert a new reachable edge, and if so, note it as further
  // work.
  void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
    assert(MBB == canonicalize(MBB));
    assert(Succ);
    // Succ may not be interesting as a sucessor.
    Succ = canonicalizeSuccessor(Succ);
    if (!Succ)
      return;
    if (Reachable[MBB].insert(Succ).second) {
      // For there to be further work, it means that we have
      //   X => MBB => Succ
      // for some other X, and in that case X => Succ would be a new edge for
      // us to discover later. However, if we don't care about MBB as a
      // successor, then we don't care about that anyhow.
      if (canonicalizeSuccessor(MBB)) {
        WorkList.emplace_back(MBB, Succ);
      }
    }
  }
};

bool LoopFixer::run() {
  Header = Loop ? Loop->getHeader() : &*MF.begin();

  // Identify all the blocks in this loop scope.
  if (Loop) {
    for (auto *MBB : Loop->getBlocks()) {
      LoopBlocks.insert(MBB);
    }
  } else {
    for (auto &MBB : MF) {
      LoopBlocks.insert(&MBB);
    }
  }

  // Compute which (canonicalized) blocks each block can reach.

  // Add all the initial work.
  for (auto *MBB : LoopBlocks) {
    MachineLoop *InnerLoop = MLI.getLoopFor(MBB);

    if (InnerLoop == Loop) {
      for (auto *Succ : MBB->successors()) {
        maybeInsert(MBB, Succ);
      }
    } else {
      // It can't be in an outer loop - we loop on LoopBlocks - and so it must
      // be an inner loop.
      assert(InnerLoop);
      // Check if we are the canonical block for this loop.
      if (canonicalize(MBB) != MBB) {
        continue;
      }
      // The successors are those of the loop.
      SmallVector<MachineBasicBlock *, 2> ExitBlocks;
      InnerLoop->getExitBlocks(ExitBlocks);
      for (auto *Succ : ExitBlocks) {
        maybeInsert(MBB, Succ);
      }
    }
  }

  // Do work until we are all done.
  while (!WorkList.empty()) {
    MachineBasicBlock *MBB;
    MachineBasicBlock *Succ;
    std::tie(MBB, Succ) = WorkList.pop_back_val();
    // The worklist item is an edge we just added, so it must have valid blocks
    // (and not something canonicalized to nullptr).
    assert(MBB);
    assert(Succ);
    // The successor in that pair must also be a valid successor.
    assert(MBB == canonicalizeSuccessor(MBB));
    // We recently added MBB => Succ, and that means we may have enabled
    // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
    // is correct for both a block and a block representing a loop, as the loop
    // is natural and so the predecessors are all predecessors of the loop
    // header, which is the block we have here.
    for (auto *Pred : MBB->predecessors()) {
      // Canonicalize, make sure it's relevant, and check it's not the same
      // block (an update to the block itself doesn't help compute that same
      // block).
      Pred = canonicalize(Pred);
      if (Pred && Pred != MBB) {
        maybeInsert(Pred, Succ);
      }
    }
  }

  // It's now trivial to identify the loopers.
  SmallPtrSet<MachineBasicBlock *, 4> Loopers;
  for (auto MBB : LoopBlocks) {
    if (Reachable[MBB].count(MBB)) {
      Loopers.insert(MBB);
    }
  }
  // The header cannot be a looper. At the toplevel, LLVM does not allow the
  // entry to be in a loop, and in a natural loop we should ignore the header.
  assert(Loopers.count(Header) == 0);

  // Find the entries, loopers reachable from non-loopers.
  SmallPtrSet<MachineBasicBlock *, 4> Entries;
  SmallVector<MachineBasicBlock *, 4> SortedEntries;
  for (auto *Looper : Loopers) {
    for (auto *Pred : Looper->predecessors()) {
      Pred = canonicalize(Pred);
      if (Pred && !Loopers.count(Pred)) {
        Entries.insert(Looper);
        SortedEntries.push_back(Looper);
        break;
      }
    }
  }

  // Check if we found irreducible control flow.
  if (LLVM_LIKELY(Entries.size() <= 1))
    return false;

  // Sort the entries to ensure a deterministic build.
  llvm::sort(SortedEntries,
             [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
               auto ANum = A->getNumber();
               auto BNum = B->getNumber();
               return ANum < BNum;
             });

#ifndef NDEBUG
  for (auto Block : SortedEntries)
    assert(Block->getNumber() != -1);
  if (SortedEntries.size() > 1) {
    for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1;
         I != E; ++I) {
      auto ANum = (*I)->getNumber();
      auto BNum = (*(std::next(I)))->getNumber();
      assert(ANum != BNum);
    }
  }
#endif

  // Create a dispatch block which will contain a jump table to the entries.
  MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
  MF.insert(MF.end(), Dispatch);
  MLI.changeLoopFor(Dispatch, Loop);

  // Add the jump table.
  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(),
                                    TII.get(WebAssembly::BR_TABLE_I32));

  // Add the register which will be used to tell the jump table which block to
  // jump to.
  MachineRegisterInfo &MRI = MF.getRegInfo();
  unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
  MIB.addReg(Reg);

  // Compute the indices in the superheader, one for each bad block, and
  // add them as successors.
  DenseMap<MachineBasicBlock *, unsigned> Indices;
  for (auto *MBB : SortedEntries) {
    auto Pair = Indices.insert(std::make_pair(MBB, 0));
    if (!Pair.second) {
      continue;
    }

    unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
    Pair.first->second = Index;

    MIB.addMBB(MBB);
    Dispatch->addSuccessor(MBB);
  }

  // Rewrite the problematic successors for every block that wants to reach the
  // bad blocks. For simplicity, we just introduce a new block for every edge
  // we need to rewrite. (Fancier things are possible.)

  SmallVector<MachineBasicBlock *, 4> AllPreds;
  for (auto *MBB : SortedEntries) {
    for (auto *Pred : MBB->predecessors()) {
      if (Pred != Dispatch) {
        AllPreds.push_back(Pred);
      }
    }
  }

  for (MachineBasicBlock *MBB : AllPreds) {
    DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
    for (auto *Succ : MBB->successors()) {
      if (!Entries.count(Succ)) {
        continue;
      }

      // This is a successor we need to rewrite.
      MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
      MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
                                             : MF.end(),
                Split);
      MLI.changeLoopFor(Split, Loop);

      // Set the jump table's register of the index of the block we wish to
      // jump to, and jump to the jump table.
      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32),
              Reg)
          .addImm(Indices[Succ]);
      BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR))
          .addMBB(Dispatch);
      Split->addSuccessor(Dispatch);
      Map[Succ] = Split;
    }
    // Remap the terminator operands and the successor list.
    for (MachineInstr &Term : MBB->terminators())
      for (auto &Op : Term.explicit_uses())
        if (Op.isMBB() && Indices.count(Op.getMBB()))
          Op.setMBB(Map[Op.getMBB()]);
    for (auto Rewrite : Map)
      MBB->replaceSuccessor(Rewrite.first, Rewrite.second);
  }

  // Create a fake default label, because br_table requires one.
  MIB.addMBB(MIB.getInstr()
                 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
                 .getMBB());

  return true;
}

class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
  StringRef getPassName() const override {
    return "WebAssembly Fix Irreducible Control Flow";
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.setPreservesCFG();
    AU.addRequired<MachineDominatorTree>();
    AU.addPreserved<MachineDominatorTree>();
    AU.addRequired<MachineLoopInfo>();
    AU.addPreserved<MachineLoopInfo>();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

  bool runOnMachineFunction(MachineFunction &MF) override;

  bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
    // Visit the function body, which is identified as a null loop.
    if (LoopFixer(MF, MLI, nullptr).run()) {
      return true;
    }

    // Visit all the loops.
    SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
    while (!Worklist.empty()) {
      MachineLoop *Loop = Worklist.pop_back_val();
      Worklist.append(Loop->begin(), Loop->end());
      if (LoopFixer(MF, MLI, Loop).run()) {
        return true;
      }
    }

    return false;
  }

public:
  static char ID; // Pass identification, replacement for typeid
  WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
};
} // end anonymous namespace

char WebAssemblyFixIrreducibleControlFlow::ID = 0;
INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
                "Removes irreducible control flow", false, false)

FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
  return new WebAssemblyFixIrreducibleControlFlow();
}

bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
    MachineFunction &MF) {
  LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
                       "********** Function: "
                    << MF.getName() << '\n');

  bool Changed = false;
  auto &MLI = getAnalysis<MachineLoopInfo>();

  // When we modify something, bail out and recompute MLI, then start again, as
  // we create a new natural loop when we resolve irreducible control flow, and
  // other loops may become nested in it, etc. In practice this is not an issue
  // because irreducible control flow is rare, only very few cycles are needed
  // here.
  while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
    // We rewrote part of the function; recompute MLI and start again.
    LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
    MF.getRegInfo().invalidateLiveness();
    MF.RenumberBlocks();
    getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
    MLI.runOnMachineFunction(MF);
    Changed = true;
  }

  return Changed;
}