summaryrefslogtreecommitdiffstats
path: root/llvm/include/llvm/Support/Automaton.h
blob: c2b921311a8ceb470f82fbbd9645438c5b99e769 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
//===-- Automaton.h - Support for driving TableGen-produced DFAs ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements class that drive and introspect deterministic finite-
// state automata (DFAs) as generated by TableGen's -gen-automata backend.
//
// For a description of how to define an automaton, see
// include/llvm/TableGen/Automaton.td.
//
// One important detail is that these deterministic automata are created from
// (potentially) nondeterministic definitions. Therefore a unique sequence of
// input symbols will produce one path through the DFA but multiple paths
// through the original NFA. An automaton by default only returns "accepted" or
// "not accepted", but frequently we want to analyze what NFA path was taken.
// Finding a path through the NFA states that results in a DFA state can help
// answer *what* the solution to a problem was, not just that there exists a
// solution.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_SUPPORT_AUTOMATON_H
#define LLVM_SUPPORT_AUTOMATON_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Allocator.h"
#include <deque>
#include <map>
#include <memory>
#include <unordered_map>
#include <vector>

namespace llvm {

using NfaPath = SmallVector<uint64_t, 4>;

/// Forward define the pair type used by the automata transition info tables.
///
/// Experimental results with large tables have shown a significant (multiple
/// orders of magnitude) parsing speedup by using a custom struct here with a
/// trivial constructor rather than std::pair<uint64_t, uint64_t>.
struct NfaStatePair {
  uint64_t FromDfaState, ToDfaState;

  bool operator<(const NfaStatePair &Other) const {
    return std::make_tuple(FromDfaState, ToDfaState) <
           std::make_tuple(Other.FromDfaState, Other.ToDfaState);
  }
};

namespace internal {
/// The internal class that maintains all possible paths through an NFA based
/// on a path through the DFA.
class NfaTranscriber {
private:
  /// Cached transition table. This is a table of NfaStatePairs that contains
  /// zero-terminated sequences pointed to by DFA transitions.
  ArrayRef<NfaStatePair> TransitionInfo;

  /// A simple linked-list of traversed states that can have a shared tail. The
  /// traversed path is stored in reverse order with the latest state as the
  /// head.
  struct PathSegment {
    uint64_t State;
    PathSegment *Tail;
  };

  /// We allocate segment objects frequently. Allocate them upfront and dispose
  /// at the end of a traversal rather than hammering the system allocator.
  SpecificBumpPtrAllocator<PathSegment> Allocator;

  /// Heads of each tracked path. These are not ordered.
  std::deque<PathSegment *> Heads;

  /// The returned paths. This is populated during getPaths.
  SmallVector<NfaPath, 4> Paths;

  /// Create a new segment and return it.
  PathSegment *makePathSegment(uint64_t State, PathSegment *Tail) {
    PathSegment *P = Allocator.Allocate();
    *P = {State, Tail};
    return P;
  }

  /// Pairs defines a sequence of possible NFA transitions for a single DFA
  /// transition.
  void transition(ArrayRef<NfaStatePair> Pairs) {
    // Iterate over all existing heads. We will mutate the Heads deque during
    // iteration.
    unsigned NumHeads = Heads.size();
    for (unsigned I = 0; I < NumHeads; ++I) {
      PathSegment *Head = Heads[I];
      // The sequence of pairs is sorted. Select the set of pairs that
      // transition from the current head state.
      auto PI = lower_bound(Pairs, NfaStatePair{Head->State, 0ULL});
      auto PE = upper_bound(Pairs, NfaStatePair{Head->State, INT64_MAX});
      // For every transition from the current head state, add a new path
      // segment.
      for (; PI != PE; ++PI)
        if (PI->FromDfaState == Head->State)
          Heads.push_back(makePathSegment(PI->ToDfaState, Head));
    }
    // Now we've iterated over all the initial heads and added new ones,
    // dispose of the original heads.
    Heads.erase(Heads.begin(), std::next(Heads.begin(), NumHeads));
  }

public:
  NfaTranscriber(ArrayRef<NfaStatePair> TransitionInfo)
      : TransitionInfo(TransitionInfo) {
    reset();
  }

  ArrayRef<NfaStatePair> getTransitionInfo() const {
    return TransitionInfo;
  }

  void reset() {
    Paths.clear();
    Heads.clear();
    Allocator.DestroyAll();
    // The initial NFA state is 0.
    Heads.push_back(makePathSegment(0ULL, nullptr));
  }

  void transition(unsigned TransitionInfoIdx) {
    unsigned EndIdx = TransitionInfoIdx;
    while (TransitionInfo[EndIdx].ToDfaState != 0)
      ++EndIdx;
    ArrayRef<NfaStatePair> Pairs(&TransitionInfo[TransitionInfoIdx],
                                 EndIdx - TransitionInfoIdx);
    transition(Pairs);
  }

  ArrayRef<NfaPath> getPaths() {
    Paths.clear();
    for (auto *Head : Heads) {
      NfaPath P;
      while (Head->State != 0) {
        P.push_back(Head->State);
        Head = Head->Tail;
      }
      std::reverse(P.begin(), P.end());
      Paths.push_back(std::move(P));
    }
    return Paths;
  }
};
} // namespace internal

/// A deterministic finite-state automaton. The automaton is defined in
/// TableGen; this object drives an automaton defined by tblgen-emitted tables.
///
/// An automaton accepts a sequence of input tokens ("actions"). This class is
/// templated on the type of these actions.
template <typename ActionT> class Automaton {
  /// Map from {State, Action} to {NewState, TransitionInfoIdx}.
  /// TransitionInfoIdx is used by the DfaTranscriber to analyze the transition.
  /// FIXME: This uses a std::map because ActionT can be a pair type including
  /// an enum. In particular DenseMapInfo<ActionT> must be defined to use
  /// DenseMap here.
  /// This is a shared_ptr to allow very quick copy-construction of Automata; this
  /// state is immutable after construction so this is safe.
  using MapTy = std::map<std::pair<uint64_t, ActionT>, std::pair<uint64_t, unsigned>>;
  std::shared_ptr<MapTy> M;
  /// An optional transcription object. This uses much more state than simply
  /// traversing the DFA for acceptance, so is heap allocated.
  std::shared_ptr<internal::NfaTranscriber> Transcriber;
  /// The initial DFA state is 1.
  uint64_t State = 1;
  /// True if we should transcribe and false if not (even if Transcriber is defined).
  bool Transcribe;

public:
  /// Create an automaton.
  /// \param Transitions The Transitions table as created by TableGen. Note that
  ///                    because the action type differs per automaton, the
  ///                    table type is templated as ArrayRef<InfoT>.
  /// \param TranscriptionTable The TransitionInfo table as created by TableGen.
  ///
  /// Providing the TranscriptionTable argument as non-empty will enable the
  /// use of transcription, which analyzes the possible paths in the original
  /// NFA taken by the DFA. NOTE: This is substantially more work than simply
  /// driving the DFA, so unless you require the getPaths() method leave this
  /// empty.
  template <typename InfoT>
  Automaton(ArrayRef<InfoT> Transitions,
            ArrayRef<NfaStatePair> TranscriptionTable = {}) {
    if (!TranscriptionTable.empty())
      Transcriber =
          std::make_shared<internal::NfaTranscriber>(TranscriptionTable);
    Transcribe = Transcriber != nullptr;
    M = std::make_shared<MapTy>();
    for (const auto &I : Transitions)
      // Greedily read and cache the transition table.
      M->emplace(std::make_pair(I.FromDfaState, I.Action),
                 std::make_pair(I.ToDfaState, I.InfoIdx));
  }
  Automaton(const Automaton &Other)
      : M(Other.M), State(Other.State), Transcribe(Other.Transcribe) {
    // Transcriber is not thread-safe, so create a new instance on copy.
    if (Other.Transcriber)
      Transcriber = std::make_shared<internal::NfaTranscriber>(
          Other.Transcriber->getTransitionInfo());
  }

  /// Reset the automaton to its initial state.
  void reset() {
    State = 1;
    if (Transcriber)
      Transcriber->reset();
  }

  /// Enable or disable transcription. Transcription is only available if
  /// TranscriptionTable was provided to the constructor.
  void enableTranscription(bool Enable = true) {
    assert(Transcriber &&
           "Transcription is only available if TranscriptionTable was provided "
           "to the Automaton constructor");
    Transcribe = Enable;
  }

  /// Transition the automaton based on input symbol A. Return true if the
  /// automaton transitioned to a valid state, false if the automaton
  /// transitioned to an invalid state.
  ///
  /// If this function returns false, all methods are undefined until reset() is
  /// called.
  bool add(const ActionT &A) {
    auto I = M->find({State, A});
    if (I == M->end())
      return false;
    if (Transcriber && Transcribe)
      Transcriber->transition(I->second.second);
    State = I->second.first;
    return true;
  }

  /// Return true if the automaton can be transitioned based on input symbol A.
  bool canAdd(const ActionT &A) {
    auto I = M->find({State, A});
    return I != M->end();
  }

  /// Obtain a set of possible paths through the input nondeterministic
  /// automaton that could be obtained from the sequence of input actions
  /// presented to this deterministic automaton.
  ArrayRef<NfaPath> getNfaPaths() {
    assert(Transcriber && Transcribe &&
           "Can only obtain NFA paths if transcribing!");
    return Transcriber->getPaths();
  }
};

} // namespace llvm

#endif // LLVM_SUPPORT_AUTOMATON_H
OpenPOWER on IntegriCloud