1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
|
//===------ CodeGeneration.cpp - Code generate the Scops using ISL. ----======//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// The CodeGeneration pass takes a Scop created by ScopInfo and translates it
// back to LLVM-IR using the ISL code generator.
//
// The Scop describes the high level memory behavior of a control flow region.
// Transformation passes can update the schedule (execution order) of statements
// in the Scop. ISL is used to generate an abstract syntax tree that reflects
// the updated execution order. This clast is used to create new LLVM-IR that is
// computationally equivalent to the original control flow region, but executes
// its code in the new execution order defined by the changed schedule.
//
//===----------------------------------------------------------------------===//
#include "polly/CodeGen/CodeGeneration.h"
#include "polly/CodeGen/IslAst.h"
#include "polly/CodeGen/IslNodeBuilder.h"
#include "polly/CodeGen/PerfMonitor.h"
#include "polly/CodeGen/Utils.h"
#include "polly/DependenceInfo.h"
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/ScopInfo.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/Debug.h"
using namespace polly;
using namespace llvm;
#define DEBUG_TYPE "polly-codegen"
static cl::opt<bool> Verify("polly-codegen-verify",
cl::desc("Verify the function generated by Polly"),
cl::Hidden, cl::init(false), cl::ZeroOrMore,
cl::cat(PollyCategory));
static cl::opt<bool>
PerfMonitoring("polly-codegen-perf-monitoring",
cl::desc("Add run-time performance monitoring"), cl::Hidden,
cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));
namespace {
static void verifyGeneratedFunction(Scop &S, Function &F, IslAstInfo &AI) {
if (!Verify || !verifyFunction(F, &errs()))
return;
DEBUG({
errs() << "== ISL Codegen created an invalid function ==\n\n== The "
"SCoP ==\n";
errs() << S;
errs() << "\n== The isl AST ==\n";
AI.print(errs());
errs() << "\n== The invalid function ==\n";
F.print(errs());
});
llvm_unreachable("Polly generated function could not be verified. Add "
"-polly-codegen-verify=false to disable this assertion.");
}
// CodeGeneration adds a lot of BBs without updating the RegionInfo
// We make all created BBs belong to the scop's parent region without any
// nested structure to keep the RegionInfo verifier happy.
static void fixRegionInfo(Function &F, Region &ParentRegion, RegionInfo &RI) {
for (BasicBlock &BB : F) {
if (RI.getRegionFor(&BB))
continue;
RI.setRegionFor(&BB, &ParentRegion);
}
}
/// Mark a basic block unreachable.
///
/// Marks the basic block @p Block unreachable by equipping it with an
/// UnreachableInst.
static void markBlockUnreachable(BasicBlock &Block, PollyIRBuilder &Builder) {
auto *OrigTerminator = Block.getTerminator();
Builder.SetInsertPoint(OrigTerminator);
Builder.CreateUnreachable();
OrigTerminator->eraseFromParent();
}
/// Remove all lifetime markers (llvm.lifetime.start, llvm.lifetime.end) from
/// @R.
///
/// CodeGeneration does not copy lifetime markers into the optimized SCoP,
/// which would leave the them only in the original path. This can transform
/// code such as
///
/// llvm.lifetime.start(%p)
/// llvm.lifetime.end(%p)
///
/// into
///
/// if (RTC) {
/// // generated code
/// } else {
/// // original code
/// llvm.lifetime.start(%p)
/// }
/// llvm.lifetime.end(%p)
///
/// The current StackColoring algorithm cannot handle if some, but not all,
/// paths from the end marker to the entry block cross the start marker. Same
/// for start markers that do not always cross the end markers. We avoid any
/// issues by removing all lifetime markers, even from the original code.
///
/// A better solution could be to hoist all llvm.lifetime.start to the split
/// node and all llvm.lifetime.end to the merge node, which should be
/// conservatively correct.
static void removeLifetimeMarkers(Region *R) {
for (auto *BB : R->blocks()) {
auto InstIt = BB->begin();
auto InstEnd = BB->end();
while (InstIt != InstEnd) {
auto NextIt = InstIt;
++NextIt;
if (auto *IT = dyn_cast<IntrinsicInst>(&*InstIt)) {
switch (IT->getIntrinsicID()) {
case llvm::Intrinsic::lifetime_start:
case llvm::Intrinsic::lifetime_end:
BB->getInstList().erase(InstIt);
break;
default:
break;
}
}
InstIt = NextIt;
}
}
}
static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
ScalarEvolution &SE, RegionInfo &RI) {
// Check if we created an isl_ast root node, otherwise exit.
isl_ast_node *AstRoot = AI.getAst();
if (!AstRoot)
return false;
auto &DL = S.getFunction().getParent()->getDataLayout();
Region *R = &S.getRegion();
assert(!R->isTopLevelRegion() && "Top level regions are not supported");
ScopAnnotator Annotator;
simplifyRegion(R, &DT, &LI, &RI);
assert(R->isSimple());
BasicBlock *EnteringBB = S.getEnteringBlock();
assert(EnteringBB);
PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator);
// Only build the run-time condition and parameters _after_ having
// introduced the conditional branch. This is important as the conditional
// branch will guard the original scop from new induction variables that
// the SCEVExpander may introduce while code generating the parameters and
// which may introduce scalar dependences that prevent us from correctly
// code generating this scop.
BBPair StartExitBlocks =
std::get<0>(executeScopConditionally(S, Builder.getTrue(), DT, RI, LI));
BasicBlock *StartBlock = std::get<0>(StartExitBlocks);
BasicBlock *ExitBlock = std::get<1>(StartExitBlocks);
removeLifetimeMarkers(R);
auto *SplitBlock = StartBlock->getSinglePredecessor();
IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock);
// All arrays must have their base pointers known before
// ScopAnnotator::buildAliasScopes.
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);
if (PerfMonitoring) {
PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
P.insertRegionStart(SplitBlock->getTerminator());
BasicBlock *MergeBlock = ExitBlock->getUniqueSuccessor();
P.insertRegionEnd(MergeBlock->getTerminator());
}
// First generate code for the hoisted invariant loads and transitively the
// parameters they reference. Afterwards, for the remaining parameters that
// might reference the hoisted loads. Finally, build the runtime check
// that might reference both hoisted loads as well as parameters.
// If the hoisting fails we have to bail and execute the original code.
Builder.SetInsertPoint(SplitBlock->getTerminator());
if (!NodeBuilder.preloadInvariantLoads()) {
// Patch the introduced branch condition to ensure that we always execute
// the original SCoP.
auto *FalseI1 = Builder.getFalse();
auto *SplitBBTerm = Builder.GetInsertBlock()->getTerminator();
SplitBBTerm->setOperand(0, FalseI1);
// Since the other branch is hence ignored we mark it as unreachable and
// adjust the dominator tree accordingly.
auto *ExitingBlock = StartBlock->getUniqueSuccessor();
assert(ExitingBlock);
auto *MergeBlock = ExitingBlock->getUniqueSuccessor();
assert(MergeBlock);
markBlockUnreachable(*StartBlock, Builder);
markBlockUnreachable(*ExitingBlock, Builder);
auto *ExitingBB = S.getExitingBlock();
assert(ExitingBB);
DT.changeImmediateDominator(MergeBlock, ExitingBB);
DT.eraseNode(ExitingBlock);
isl_ast_node_free(AstRoot);
} else {
NodeBuilder.addParameters(S.getContext());
Value *RTC = NodeBuilder.createRTC(AI.getRunCondition());
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
// Explicitly set the insert point to the end of the block to avoid that a
// split at the builder's current
// insert position would move the malloc calls to the wrong BasicBlock.
// Ideally we would just split the block during allocation of the new
// arrays, but this would break the assumption that there are no blocks
// between polly.start and polly.exiting (at this point).
Builder.SetInsertPoint(StartBlock->getTerminator());
NodeBuilder.create(AstRoot);
NodeBuilder.finalize();
fixRegionInfo(*EnteringBB->getParent(), *R->getParent(), RI);
}
Function *F = EnteringBB->getParent();
verifyGeneratedFunction(S, *F, AI);
for (auto *SubF : NodeBuilder.getParallelSubfunctions())
verifyGeneratedFunction(S, *SubF, AI);
// Mark the function such that we run additional cleanup passes on this
// function (e.g. mem2reg to rediscover phi nodes).
F->addFnAttr("polly-optimized");
return true;
}
class CodeGeneration : public ScopPass {
public:
static char ID;
CodeGeneration() : ScopPass(ID) {}
/// The data layout used.
const DataLayout *DL;
/// @name The analysis passes we need to generate code.
///
///{
LoopInfo *LI;
IslAstInfo *AI;
DominatorTree *DT;
ScalarEvolution *SE;
RegionInfo *RI;
///}
/// Generate LLVM-IR for the SCoP @p S.
bool runOnScop(Scop &S) override {
// Skip SCoPs in case they're already code-generated by PPCGCodeGeneration.
if (S.isToBeSkipped())
return false;
AI = &getAnalysis<IslAstInfoWrapperPass>().getAI();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DL = &S.getFunction().getParent()->getDataLayout();
RI = &getAnalysis<RegionInfoPass>().getRegionInfo();
return CodeGen(S, *AI, *LI, *DT, *SE, *RI);
}
/// Register all analyses and transformation required.
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<IslAstInfoWrapperPass>();
AU.addRequired<RegionInfoPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<ScopDetectionWrapperPass>();
AU.addRequired<ScopInfoRegionPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.addPreserved<DependenceInfo>();
AU.addPreserved<AAResultsWrapperPass>();
AU.addPreserved<BasicAAWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
AU.addPreserved<IslAstInfoWrapperPass>();
AU.addPreserved<ScopDetectionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<SCEVAAWrapperPass>();
// FIXME: We do not yet add regions for the newly generated code to the
// region tree.
AU.addPreserved<RegionInfoPass>();
AU.addPreserved<ScopInfoRegionPass>();
}
};
} // namespace
PreservedAnalyses
polly::CodeGenerationPass::run(Scop &S, ScopAnalysisManager &SAM,
ScopStandardAnalysisResults &AR, SPMUpdater &U) {
auto &AI = SAM.getResult<IslAstAnalysis>(S, AR);
if (CodeGen(S, AI, AR.LI, AR.DT, AR.SE, AR.RI))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
char CodeGeneration::ID = 1;
Pass *polly::createCodeGenerationPass() { return new CodeGeneration(); }
INITIALIZE_PASS_BEGIN(CodeGeneration, "polly-codegen",
"Polly - Create LLVM-IR from SCoPs", false, false);
INITIALIZE_PASS_DEPENDENCY(DependenceInfo);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass);
INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass);
INITIALIZE_PASS_END(CodeGeneration, "polly-codegen",
"Polly - Create LLVM-IR from SCoPs", false, false)
|