summaryrefslogtreecommitdiffstats
path: root/polly/lib/IndependentBlocks.cpp
blob: f97a334e9d5c931c66c8cef3da2e044d1ef63c4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
//===------ IndependentBlocks.cpp - Create Independent Blocks in Regions --===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Create independent blocks in the regions detected by ScopDetection.
//
//===----------------------------------------------------------------------===//
//
#include "polly/LinkAllPasses.h"
#include "polly/Options.h"
#include "polly/CodeGen/BlockGenerators.h"
#include "polly/CodeGen/Cloog.h"
#include "polly/ScopDetection.h"
#include "polly/Support/ScopHelper.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CommandLine.h"
#define DEBUG_TYPE "polly-independent"
#include "llvm/Support/Debug.h"

#include <vector>

using namespace polly;
using namespace llvm;

static cl::opt<bool> DisableIntraScopScalarToArray(
    "disable-polly-intra-scop-scalar-to-array",
    cl::desc("Do not rewrite scalar to array to generate independent blocks"),
    cl::Hidden, cl::init(false), cl::cat(PollyCategory));

namespace {
struct IndependentBlocks : public FunctionPass {
  RegionInfo *RI;
  ScalarEvolution *SE;
  ScopDetection *SD;
  LoopInfo *LI;

  BasicBlock *AllocaBlock;

  static char ID;

  IndependentBlocks() : FunctionPass(ID) {}

  // Create new code for every instruction operator that can be expressed by a
  // SCEV.  Like this there are just two types of instructions left:
  //
  // 1. Instructions that only reference loop ivs or parameters outside the
  // region.
  //
  // 2. Instructions that are not used for any memory modification. (These
  //    will be ignored later on.)
  //
  // Blocks containing only these kind of instructions are called independent
  // blocks as they can be scheduled arbitrarily.
  bool createIndependentBlocks(BasicBlock *BB, const Region *R);
  bool createIndependentBlocks(const Region *R);

  // Elimination on the Scop to eliminate the scalar dependences come with
  // trivially dead instructions.
  bool eliminateDeadCode(const Region *R);

  //===--------------------------------------------------------------------===//
  /// Non trivial scalar dependences checking functions.
  /// Non trivial scalar dependences occur when the def and use are located in
  /// different BBs and we can not move them into the same one. This will
  /// prevent use from schedule BBs arbitrarily.
  ///
  /// @brief This function checks if a scalar value that is part of the
  ///        Scop is used outside of the Scop.
  ///
  /// @param Use  The use of the instruction.
  /// @param R    The maximum region in the Scop.
  ///
  /// @return Return true if the Use of an instruction and the instruction
  ///         itself form a non trivial scalar dependence.
  static bool isEscapeUse(const Value *Use, const Region *R);

  /// @brief This function just checks if a Value is either defined in the same
  ///        basic block or outside the region, such that there are no scalar
  ///        dependences between basic blocks that are both part of the same
  ///        region.
  ///
  /// @param Operand  The operand of the instruction.
  /// @param CurBB    The BasicBlock that contains the instruction.
  /// @param R        The maximum region in the Scop.
  ///
  /// @return Return true if the Operand of an instruction and the instruction
  ///         itself form a non trivial scalar (true) dependence.
  bool isEscapeOperand(const Value *Operand, const BasicBlock *CurBB,
                       const Region *R) const;

  //===--------------------------------------------------------------------===//
  /// Operand tree moving functions.
  /// Trivial scalar dependences can eliminate by move the def to the same BB
  /// that containing use.
  ///
  /// @brief Check if the instruction can be moved to another place safely.
  ///
  /// @param Inst The instruction.
  ///
  /// @return Return true if the instruction can be moved safely, false
  ///         otherwise.
  static bool isSafeToMove(Instruction *Inst);

  typedef std::map<Instruction *, Instruction *> ReplacedMapType;

  /// @brief Move all safe to move instructions in the Operand Tree (DAG) to
  ///        eliminate trivial scalar dependences.
  ///
  /// @param Inst         The root of the operand Tree.
  /// @param R            The maximum region in the Scop.
  /// @param ReplacedMap  The map that mapping original instruction to the moved
  ///                     instruction.
  /// @param InsertPos    The insert position of the moved instructions.
  void moveOperandTree(Instruction *Inst, const Region *R,
                       ReplacedMapType &ReplacedMap, Instruction *InsertPos);

  bool isIndependentBlock(const Region *R, BasicBlock *BB) const;
  bool areAllBlocksIndependent(const Region *R) const;

  // Split the exit block to hold load instructions.
  bool splitExitBlock(Region *R);
  bool onlyUsedInRegion(Instruction *Inst, const Region *R);
  bool translateScalarToArray(BasicBlock *BB, const Region *R);
  bool translateScalarToArray(Instruction *Inst, const Region *R);
  bool translateScalarToArray(const Region *R);

  bool runOnFunction(Function &F);
  void verifyAnalysis() const;
  void verifyScop(const Region *R) const;
  void getAnalysisUsage(AnalysisUsage &AU) const;
};
}

bool IndependentBlocks::isSafeToMove(Instruction *Inst) {
  if (Inst->mayReadFromMemory() || Inst->mayWriteToMemory())
    return false;

  return isSafeToSpeculativelyExecute(Inst);
}

void IndependentBlocks::moveOperandTree(Instruction *Inst, const Region *R,
                                        ReplacedMapType &ReplacedMap,
                                        Instruction *InsertPos) {
  BasicBlock *CurBB = Inst->getParent();

  // Depth first traverse the operand tree (or operand dag, because we will
  // stop at PHINodes, so there are no cycle).
  typedef Instruction::op_iterator ChildIt;
  std::vector<std::pair<Instruction *, ChildIt> > WorkStack;

  WorkStack.push_back(std::make_pair(Inst, Inst->op_begin()));
  DenseSet<Instruction *> VisitedSet;

  while (!WorkStack.empty()) {
    Instruction *CurInst = WorkStack.back().first;
    ChildIt It = WorkStack.back().second;
    DEBUG(dbgs() << "Checking Operand of Node:\n" << *CurInst << "\n------>\n");
    if (It == CurInst->op_end()) {
      // Insert the new instructions in topological order.
      if (!CurInst->getParent()) {
        CurInst->insertBefore(InsertPos);
        SE->forgetValue(CurInst);
      }

      WorkStack.pop_back();
    } else {
      // for each node N,
      Instruction *Operand = dyn_cast<Instruction>(*It);
      ++WorkStack.back().second;

      // Can not move no instruction value.
      if (Operand == 0)
        continue;

      DEBUG(dbgs() << "For Operand:\n" << *Operand << "\n--->");

      // If the Scop Region does not contain N, skip it and all its operands and
      // continue: because we reach a "parameter".
      // FIXME: we must keep the predicate instruction inside the Scop,
      // otherwise it will be translated to a load instruction, and we can not
      // handle load as affine predicate at this moment.
      if (!R->contains(Operand) && !isa<TerminatorInst>(CurInst)) {
        DEBUG(dbgs() << "Out of region.\n");
        continue;
      }

      if (canSynthesize(Operand, LI, SE, R)) {
        DEBUG(dbgs() << "is IV.\n");
        continue;
      }

      // We can not move the operand, a non trivial scalar dependence found!
      if (!isSafeToMove(Operand)) {
        DEBUG(dbgs() << "Can not move!\n");
        continue;
      }

      // Do not need to move instruction if it is contained in the same BB with
      // the root instruction.
      if (Operand->getParent() == CurBB) {
        DEBUG(dbgs() << "No need to move.\n");
        // Try to move its operand, but do not visit an instuction twice.
        if (VisitedSet.insert(Operand).second)
          WorkStack.push_back(std::make_pair(Operand, Operand->op_begin()));
        continue;
      }

      // Now we need to move Operand to CurBB.
      // Check if we already moved it.
      ReplacedMapType::iterator At = ReplacedMap.find(Operand);
      if (At != ReplacedMap.end()) {
        DEBUG(dbgs() << "Moved.\n");
        Instruction *MovedOp = At->second;
        It->set(MovedOp);
        SE->forgetValue(MovedOp);
      } else {
        // Note that NewOp is not inserted in any BB now, we will insert it when
        // it popped form the work stack, so it will be inserted in topological
        // order.
        Instruction *NewOp = Operand->clone();
        NewOp->setName(Operand->getName() + ".moved.to." + CurBB->getName());
        DEBUG(dbgs() << "Move to " << *NewOp << "\n");
        It->set(NewOp);
        ReplacedMap.insert(std::make_pair(Operand, NewOp));
        SE->forgetValue(Operand);

        // Process its operands, but do not visit an instuction twice.
        if (VisitedSet.insert(NewOp).second)
          WorkStack.push_back(std::make_pair(NewOp, NewOp->op_begin()));
      }
    }
  }

  SE->forgetValue(Inst);
}

bool IndependentBlocks::createIndependentBlocks(BasicBlock *BB,
                                                const Region *R) {
  std::vector<Instruction *> WorkList;
  for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; ++II)
    if (!isSafeToMove(II) && !canSynthesize(II, LI, SE, R))
      WorkList.push_back(II);

  ReplacedMapType ReplacedMap;
  Instruction *InsertPos = BB->getFirstNonPHIOrDbg();

  for (std::vector<Instruction *>::iterator I = WorkList.begin(),
                                            E = WorkList.end();
       I != E; ++I)
    moveOperandTree(*I, R, ReplacedMap, InsertPos);

  // The BB was changed if we replaced any operand.
  return !ReplacedMap.empty();
}

bool IndependentBlocks::createIndependentBlocks(const Region *R) {
  bool Changed = false;

  for (Region::const_block_iterator SI = R->block_begin(), SE = R->block_end();
       SI != SE; ++SI)
    Changed |= createIndependentBlocks(*SI, R);

  return Changed;
}

bool IndependentBlocks::eliminateDeadCode(const Region *R) {
  std::vector<Instruction *> WorkList;

  // Find all trivially dead instructions.
  for (Region::const_block_iterator SI = R->block_begin(), SE = R->block_end();
       SI != SE; ++SI)
    for (BasicBlock::iterator I = (*SI)->begin(), E = (*SI)->end(); I != E; ++I)
      if (isInstructionTriviallyDead(I))
        WorkList.push_back(I);

  if (WorkList.empty())
    return false;

  // Delete them so the cross BB scalar dependences come with them will
  // also be eliminated.
  while (!WorkList.empty()) {
    RecursivelyDeleteTriviallyDeadInstructions(WorkList.back());
    WorkList.pop_back();
  }

  return true;
}

bool IndependentBlocks::isEscapeUse(const Value *Use, const Region *R) {
  // Non-instruction user will never escape.
  if (!isa<Instruction>(Use))
    return false;

  return !R->contains(cast<Instruction>(Use));
}

bool IndependentBlocks::isEscapeOperand(const Value *Operand,
                                        const BasicBlock *CurBB,
                                        const Region *R) const {
  const Instruction *OpInst = dyn_cast<Instruction>(Operand);

  // Non-instruction operand will never escape.
  if (OpInst == 0)
    return false;

  // Induction variables are valid operands.
  if (canSynthesize(OpInst, LI, SE, R))
    return false;

  // A value from a different BB is used in the same region.
  return R->contains(OpInst) && (OpInst->getParent() != CurBB);
}

bool IndependentBlocks::splitExitBlock(Region *R) {
  // Split the exit BB to place the load instruction of escaped users.
  BasicBlock *ExitBB = R->getExit();
  Region *ExitRegion = RI->getRegionFor(ExitBB);

  if (ExitBB != ExitRegion->getEntry())
    return false;

  BasicBlock *NewExit = createSingleExitEdge(R, this);

  std::vector<Region *> toUpdate;
  toUpdate.push_back(R);

  while (!toUpdate.empty()) {
    Region *Reg = toUpdate.back();
    toUpdate.pop_back();

    for (Region::iterator I = Reg->begin(), E = Reg->end(); I != E; ++I) {
      Region *SubR = *I;

      if (SubR->getExit() == ExitBB)
        toUpdate.push_back(SubR);
    }

    Reg->replaceExit(NewExit);
  }

  RI->setRegionFor(NewExit, R->getParent());
  return true;
}

bool IndependentBlocks::translateScalarToArray(const Region *R) {
  bool Changed = false;

  for (Region::const_block_iterator SI = R->block_begin(), SE = R->block_end();
       SI != SE; ++SI)
    Changed |= translateScalarToArray(*SI, R);

  return Changed;
}

// Returns true when Inst is only used inside region R.
bool IndependentBlocks::onlyUsedInRegion(Instruction *Inst, const Region *R) {
  for (Instruction::use_iterator UI = Inst->use_begin(), UE = Inst->use_end();
       UI != UE; ++UI)
    if (Instruction *U = dyn_cast<Instruction>(*UI))
      if (isEscapeUse(U, R))
        return false;

  return true;
}

bool IndependentBlocks::translateScalarToArray(Instruction *Inst,
                                               const Region *R) {
  if (canSynthesize(Inst, LI, SE, R) && onlyUsedInRegion(Inst, R))
    return false;

  SmallVector<Instruction *, 4> LoadInside, LoadOutside;
  for (Instruction::use_iterator UI = Inst->use_begin(), UE = Inst->use_end();
       UI != UE; ++UI)
    // Inst is referenced outside or referenced as an escaped operand.
    if (Instruction *U = dyn_cast<Instruction>(*UI)) {
      if (isEscapeUse(U, R))
        LoadOutside.push_back(U);

      if (DisableIntraScopScalarToArray)
        continue;

      if (canSynthesize(U, LI, SE, R))
        continue;

      BasicBlock *UParent = U->getParent();
      if (R->contains(UParent) && isEscapeOperand(Inst, UParent, R))
        LoadInside.push_back(U);
    }

  if (LoadOutside.empty() && LoadInside.empty())
    return false;

  // Create the alloca.
  AllocaInst *Slot = new AllocaInst(
      Inst->getType(), 0, Inst->getName() + ".s2a", AllocaBlock->begin());
  assert(!isa<InvokeInst>(Inst) && "Unexpect Invoke in Scop!");

  // Store right after Inst, and make sure the position is after all phi nodes.
  BasicBlock::iterator StorePos;
  if (isa<PHINode>(Inst)) {
    StorePos = Inst->getParent()->getFirstNonPHI();
  } else {
    StorePos = Inst;
    StorePos++;
  }
  (void)new StoreInst(Inst, Slot, StorePos);

  if (!LoadOutside.empty()) {
    LoadInst *ExitLoad = new LoadInst(Slot, Inst->getName() + ".loadoutside",
                                      false, R->getExit()->getFirstNonPHI());

    while (!LoadOutside.empty()) {
      Instruction *U = LoadOutside.pop_back_val();
      assert(!isa<PHINode>(U) && "Can not handle PHI node outside!");
      SE->forgetValue(U);
      U->replaceUsesOfWith(Inst, ExitLoad);
    }
  }

  while (!LoadInside.empty()) {
    Instruction *U = LoadInside.pop_back_val();
    assert(!isa<PHINode>(U) && "Can not handle PHI node inside!");
    SE->forgetValue(U);
    LoadInst *L = new LoadInst(Slot, Inst->getName() + ".loadarray", false, U);
    U->replaceUsesOfWith(Inst, L);
  }

  return true;
}

bool IndependentBlocks::translateScalarToArray(BasicBlock *BB,
                                               const Region *R) {
  bool changed = false;

  SmallVector<Instruction *, 32> Insts;
  for (BasicBlock::iterator II = BB->begin(), IE = --BB->end(); II != IE; ++II)
    Insts.push_back(II);

  while (!Insts.empty()) {
    Instruction *Inst = Insts.pop_back_val();
    changed |= translateScalarToArray(Inst, R);
  }

  return changed;
}

bool IndependentBlocks::isIndependentBlock(const Region *R,
                                           BasicBlock *BB) const {
  for (BasicBlock::iterator II = BB->begin(), IE = --BB->end(); II != IE;
       ++II) {
    Instruction *Inst = &*II;

    if (canSynthesize(Inst, LI, SE, R))
      continue;

    // A value inside the Scop is referenced outside.
    for (Instruction::use_iterator UI = Inst->use_begin(), UE = Inst->use_end();
         UI != UE; ++UI) {
      if (isEscapeUse(*UI, R)) {
        DEBUG(dbgs() << "Instruction not independent:\n");
        DEBUG(dbgs() << "Instruction used outside the Scop!\n");
        DEBUG(Inst->print(dbgs()));
        DEBUG(dbgs() << "\n");
        return false;
      }
    }

    if (DisableIntraScopScalarToArray)
      continue;

    for (Instruction::op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
         OI != OE; ++OI) {
      if (isEscapeOperand(*OI, BB, R)) {
        DEBUG(dbgs() << "Instruction in function '";
              WriteAsOperand(dbgs(), BB->getParent(), false);
              dbgs() << "' not independent:\n");
        DEBUG(dbgs() << "Uses invalid operator\n");
        DEBUG(Inst->print(dbgs()));
        DEBUG(dbgs() << "\n");
        DEBUG(dbgs() << "Invalid operator is: ";
              WriteAsOperand(dbgs(), *OI, false); dbgs() << "\n");
        return false;
      }
    }
  }

  return true;
}

bool IndependentBlocks::areAllBlocksIndependent(const Region *R) const {
  for (Region::const_block_iterator SI = R->block_begin(), SE = R->block_end();
       SI != SE; ++SI)
    if (!isIndependentBlock(R, *SI))
      return false;

  return true;
}

void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const {
  // FIXME: If we set preserves cfg, the cfg only passes do not need to
  // be "addPreserved"?
  AU.addPreserved<DominatorTree>();
  AU.addPreserved<DominanceFrontier>();
  AU.addPreserved<PostDominatorTree>();
  AU.addRequired<RegionInfo>();
  AU.addPreserved<RegionInfo>();
  AU.addRequired<LoopInfo>();
  AU.addPreserved<LoopInfo>();
  AU.addRequired<ScalarEvolution>();
  AU.addPreserved<ScalarEvolution>();
  AU.addRequired<ScopDetection>();
  AU.addPreserved<ScopDetection>();
#ifdef CLOOG_FOUND
  AU.addPreserved<CloogInfo>();
#endif
}

bool IndependentBlocks::runOnFunction(llvm::Function &F) {
  bool Changed = false;

  RI = &getAnalysis<RegionInfo>();
  LI = &getAnalysis<LoopInfo>();
  SD = &getAnalysis<ScopDetection>();
  SE = &getAnalysis<ScalarEvolution>();

  AllocaBlock = &F.getEntryBlock();

  DEBUG(dbgs() << "Run IndepBlock on " << F.getName() << '\n');

  for (ScopDetection::iterator I = SD->begin(), E = SD->end(); I != E; ++I) {
    const Region *R = *I;
    Changed |= createIndependentBlocks(R);
    Changed |= eliminateDeadCode(R);
    // This may change the RegionTree.
    Changed |= splitExitBlock(const_cast<Region *>(R));
  }

  DEBUG(dbgs() << "Before Scalar to Array------->\n");
  DEBUG(F.dump());

  for (ScopDetection::iterator I = SD->begin(), E = SD->end(); I != E; ++I)
    Changed |= translateScalarToArray(*I);

  DEBUG(dbgs() << "After Independent Blocks------------->\n");
  DEBUG(F.dump());

  verifyAnalysis();

  return Changed;
}

void IndependentBlocks::verifyAnalysis() const {
  for (ScopDetection::const_iterator I = SD->begin(), E = SD->end(); I != E;
       ++I)
    verifyScop(*I);
}

void IndependentBlocks::verifyScop(const Region *R) const {
  assert(areAllBlocksIndependent(R) && "Cannot generate independent blocks");
}

char IndependentBlocks::ID = 0;
char &polly::IndependentBlocksID = IndependentBlocks::ID;

Pass *polly::createIndependentBlocksPass() { return new IndependentBlocks(); }

INITIALIZE_PASS_BEGIN(IndependentBlocks, "polly-independent",
                      "Polly - Create independent blocks", false, false);
INITIALIZE_PASS_DEPENDENCY(LoopInfo);
INITIALIZE_PASS_DEPENDENCY(RegionInfo);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution);
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
INITIALIZE_PASS_END(IndependentBlocks, "polly-independent",
                    "Polly - Create independent blocks", false, false)
OpenPOWER on IntegriCloud