| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
 | //===-- Relooper.h - Top-level interface for WebAssembly  ----*- C++ -*-===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===-------------------------------------------------------------------===//
///
/// \file
/// \brief This defines an optimized C++ implemention of the Relooper
/// algorithm, originally developed as part of Emscripten, which
/// generates a structured AST from arbitrary control flow.
///
//===-------------------------------------------------------------------===//
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Support/Casting.h"
#include <cassert>
#include <cstdarg>
#include <cstdio>
#include <deque>
#include <list>
#include <map>
#include <memory>
#include <set>
namespace llvm {
namespace Relooper {
struct Block;
struct Shape;
///
/// Info about a branching from one block to another
///
struct Branch {
  enum FlowType {
    Direct = 0, // We will directly reach the right location through other
                // means, no need for continue or break
    Break = 1,
    Continue = 2,
    Nested = 3 // This code is directly reached, but we must be careful to
               // ensure it is nested in an if - it is not reached
    // unconditionally, other code paths exist alongside it that we need to make
    // sure do not intertwine
  };
  Shape
      *Ancestor; // If not nullptr, this shape is the relevant one for purposes
                 // of getting to the target block. We break or continue on it
  Branch::FlowType
      Type;     // If Ancestor is not nullptr, this says whether to break or
                // continue
  bool Labeled; // If a break or continue, whether we need to use a label
  const char *Condition; // The condition for which we branch. For example,
                         // "my_var == 1". Conditions are checked one by one.
                         // One of the conditions should have nullptr as the
                         // condition, in which case it is the default
                         // FIXME: move from char* to LLVM data structures
  const char *Code; // If provided, code that is run right before the branch is
                    // taken. This is useful for phis
                    // FIXME: move from char* to LLVM data structures
  Branch(const char *ConditionInit, const char *CodeInit = nullptr);
  ~Branch();
};
typedef SetVector<Block *> BlockSet;
typedef MapVector<Block *, Branch *> BlockBranchMap;
typedef MapVector<Block *, std::unique_ptr<Branch>> OwningBlockBranchMap;
///
/// Represents a basic block of code - some instructions that end with a
/// control flow modifier (a branch, return or throw).
///
struct Block {
  // Branches become processed after we finish the shape relevant to them. For
  // example, when we recreate a loop, branches to the loop start become
  // continues and are now processed. When we calculate what shape to generate
  // from a set of blocks, we ignore processed branches. Blocks own the Branch
  // objects they use, and destroy them when done.
  OwningBlockBranchMap BranchesOut;
  BlockSet BranchesIn;
  OwningBlockBranchMap ProcessedBranchesOut;
  BlockSet ProcessedBranchesIn;
  Shape *Parent; // The shape we are directly inside
  int Id; // A unique identifier, defined when added to relooper. Note that this
          // uniquely identifies a *logical* block - if we split it, the two
          // instances have the same content *and* the same Id
  const char *Code;      // The string representation of the code in this block.
                         // Owning pointer (we copy the input)
                         // FIXME: move from char* to LLVM data structures
  const char *BranchVar; // A variable whose value determines where we go; if
                         // this is not nullptr, emit a switch on that variable
                         // FIXME: move from char* to LLVM data structures
  bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching
                               // us requires setting the label variable
  Block(const char *CodeInit, const char *BranchVarInit);
  ~Block();
  void AddBranchTo(Block *Target, const char *Condition,
                   const char *Code = nullptr);
};
///
/// Represents a structured control flow shape
///
struct Shape {
  int Id; // A unique identifier. Used to identify loops, labels are Lx where x
          // is the Id. Defined when added to relooper
  Shape *Next;    // The shape that will appear in the code right after this one
  Shape *Natural; // The shape that control flow gets to naturally (if there is
                  // Next, then this is Next)
  /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
  enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop };
private:
  ShapeKind Kind;
public:
  ShapeKind getKind() const { return Kind; }
  Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {}
};
///
/// Simple: No control flow at all, just instructions.
///
struct SimpleShape : public Shape {
  Block *Inner;
  SimpleShape() : Shape(SK_Simple), Inner(nullptr) {}
  static bool classof(const Shape *S) { return S->getKind() == SK_Simple; }
};
///
/// A shape that may be implemented with a labeled loop.
///
struct LabeledShape : public Shape {
  bool Labeled; // If we have a loop, whether it needs to be labeled
  LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {}
};
// Blocks with the same id were split and are identical, so we just care about
// ids in Multiple entries
typedef std::map<int, Shape *> IdShapeMap;
///
/// Multiple: A shape with more than one entry. If the next block to
///           be entered is among them, we run it and continue to
///           the next shape, otherwise we continue immediately to the
///           next shape.
///
struct MultipleShape : public LabeledShape {
  IdShapeMap InnerMap; // entry block ID -> shape
  int Breaks; // If we have branches on us, we need a loop (or a switch). This
              // is a counter of requirements,
              // if we optimize it to 0, the loop is unneeded
  bool UseSwitch; // Whether to switch on label as opposed to an if-else chain
  MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {}
  static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; }
};
///
/// Loop: An infinite loop.
///
struct LoopShape : public LabeledShape {
  Shape *Inner;
  LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {}
  static bool classof(const Shape *S) { return S->getKind() == SK_Loop; }
};
} // namespace Relooper
} // namespace llvm
 |