//===----- ScopDetection.cpp - Detect Scops --------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Detect the maximal Scops of a function. // // A static control part (Scop) is a subgraph of the control flow graph (CFG) // that only has statically known control flow and can therefore be described // within the polyhedral model. // // Every Scop fullfills these restrictions: // // * It is a single entry single exit region // // * Only affine linear bounds in the loops // // Every natural loop in a Scop must have a number of loop iterations that can // be described as an affine linear function in surrounding loop iterators or // parameters. (A parameter is a scalar that does not change its value during // execution of the Scop). // // * Only comparisons of affine linear expressions in conditions // // * All loops and conditions perfectly nested // // The control flow needs to be structured such that it could be written using // just 'for' and 'if' statements, without the need for any 'goto', 'break' or // 'continue'. // // * Side effect free functions call // // Only function calls and intrinsics that do not have side effects are allowed // (readnone). // // The Scop detection finds the largest Scops by checking if the largest // region is a Scop. If this is not the case, its canonical subregions are // checked until a region is a Scop. It is now tried to extend this Scop by // creating a larger non canonical region. // //===----------------------------------------------------------------------===// #include "polly/ScopDetection.h" #include "polly/LinkAllPasses.h" #include "polly/Support/ScopHelper.h" #include "polly/Support/AffineSCEVIterator.h" #include "llvm/LLVMContext.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Assembly/Writer.h" #define DEBUG_TYPE "polly-detect" #include "llvm/Support/Debug.h" using namespace llvm; using namespace polly; static cl::opt OnlyFunction("polly-detect-only", cl::desc("Only detect scops in function"), cl::Hidden, cl::value_desc("The function name to detect scops in"), cl::ValueRequired, cl::init("")); //===----------------------------------------------------------------------===// // Statistics. STATISTIC(ValidRegion, "Number of regions that a valid part of Scop"); #define BADSCOP_STAT(NAME, DESC) STATISTIC(Bad##NAME##ForScop, \ "Number of bad regions for Scop: "\ DESC) #define STATSCOP(NAME); assert(!Context.Verifying && #NAME); \ if (!Context.Verifying) ++Bad##NAME##ForScop; #define INVALID(NAME, MESSAGE) \ do { \ std::string Buf; \ raw_string_ostream fmt(Buf); \ fmt << MESSAGE; \ fmt.flush(); \ LastFailure = Buf; \ DEBUG(dbgs() << MESSAGE); \ DEBUG(dbgs() << "\n"); \ STATSCOP(NAME); \ return false; \ } while (0); BADSCOP_STAT(CFG, "CFG too complex"); BADSCOP_STAT(IndVar, "Non canonical induction variable in loop"); BADSCOP_STAT(LoopBound, "Loop bounds can not be computed"); BADSCOP_STAT(FuncCall, "Function call with side effects appeared"); BADSCOP_STAT(AffFunc, "Expression not affine"); BADSCOP_STAT(Scalar, "Found scalar dependency"); BADSCOP_STAT(Alias, "Found base address alias"); BADSCOP_STAT(SimpleRegion, "Region not simple"); BADSCOP_STAT(Other, "Others"); //===----------------------------------------------------------------------===// // ScopDetection. namespace SCEVType { enum TYPE {INT, PARAM, IV, INVALID}; } /// Check if a SCEV is valid in a SCoP. struct SCEVValidator : public SCEVVisitor { private: const Region *R; ScalarEvolution &SE; const Value **BaseAddress; public: static bool isValid(const Region *R, const SCEV *Scev, ScalarEvolution &SE, const Value **BaseAddress = NULL) { if (isa(Scev)) return false; SCEVValidator Validator(R, SE, BaseAddress); return Validator.visit(Scev) != SCEVType::INVALID; } SCEVValidator(const Region *R, ScalarEvolution &SE, const Value **BaseAddress) : R(R), SE(SE), BaseAddress(BaseAddress) {}; SCEVType::TYPE visitConstant(const SCEVConstant *Constant) { return SCEVType::INT; } SCEVType::TYPE visitTruncateExpr(const SCEVTruncateExpr* Expr) { SCEVType::TYPE Op = visit(Expr->getOperand()); // We cannot represent this as a affine expression yet. If it is constant // during Scop execution treat this as a parameter, otherwise bail out. if (Op == SCEVType::INT || Op == SCEVType::PARAM) return SCEVType::PARAM; return SCEVType::INVALID; } SCEVType::TYPE visitZeroExtendExpr(const SCEVZeroExtendExpr * Expr) { SCEVType::TYPE Op = visit(Expr->getOperand()); // We cannot represent this as a affine expression yet. If it is constant // during Scop execution treat this as a parameter, otherwise bail out. if (Op == SCEVType::INT || Op == SCEVType::PARAM) return SCEVType::PARAM; return SCEVType::INVALID; } SCEVType::TYPE visitSignExtendExpr(const SCEVSignExtendExpr* Expr) { // Assuming the value is signed, a sign extension is basically a noop. // TODO: Reconsider this as soon as we support unsigned values. return visit(Expr->getOperand()); } SCEVType::TYPE visitAddExpr(const SCEVAddExpr* Expr) { SCEVType::TYPE Return = SCEVType::INT; for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { SCEVType::TYPE OpType = visit(Expr->getOperand(i)); Return = std::max(Return, OpType); } // TODO: Check for NSW and NUW. return Return; } SCEVType::TYPE visitMulExpr(const SCEVMulExpr* Expr) { SCEVType::TYPE Return = SCEVType::INT; for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { SCEVType::TYPE OpType = visit(Expr->getOperand(i)); if (OpType == SCEVType::INVALID) return SCEVType::INVALID; if (OpType == SCEVType::IV) { if (Return == SCEVType::PARAM || Return == SCEVType::IV) return SCEVType::INVALID; Return = OpType; continue; } if (OpType == SCEVType::PARAM) { if (Return == SCEVType::PARAM) return SCEVType::INVALID; Return = SCEVType::PARAM; continue; } // OpType == SCEVType::INT, no need to change anything. } // TODO: Check for NSW and NUW. return Return; } SCEVType::TYPE visitUDivExpr(const SCEVUDivExpr* Expr) { // We do not yet support unsigned operations. return SCEVType::INVALID; } SCEVType::TYPE visitAddRecExpr(const SCEVAddRecExpr* Expr) { if (!Expr->isAffine()) return SCEVType::INVALID; SCEVType::TYPE Start = visit(Expr->getStart()); if (Start == SCEVType::INVALID) return Start; SCEVType::TYPE Recurrence = visit(Expr->getStepRecurrence(SE)); if (Recurrence != SCEVType::INT) return SCEVType::INVALID; return SCEVType::PARAM; } SCEVType::TYPE visitSMaxExpr(const SCEVSMaxExpr* Expr) { SCEVType::TYPE Return = SCEVType::INT; for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { SCEVType::TYPE OpType = visit(Expr->getOperand(i)); if (OpType == SCEVType::INVALID) return SCEVType::INVALID; if (OpType == SCEVType::PARAM) Return = SCEVType::PARAM; } return Return; } SCEVType::TYPE visitUMaxExpr(const SCEVUMaxExpr* Expr) { // We do not yet support unsigned operations. If 'Expr' is constant // during Scop execution treat this as a parameter, otherwise bail out. for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { SCEVType::TYPE OpType = visit(Expr->getOperand(i)); if (OpType != SCEVType::INT && OpType != SCEVType::PARAM) return SCEVType::PARAM; } return SCEVType::PARAM; } SCEVType::TYPE visitUnknown(const SCEVUnknown* Expr) { if (Instruction *I = dyn_cast(Expr->getValue())) if (R->contains(I)) return SCEVType::INVALID; return SCEVType::PARAM; } }; bool ScopDetection::isMaxRegionInScop(const Region &R) const { // The Region is valid only if it could be found in the set. return ValidRegions.count(&R); } std::string ScopDetection::regionIsInvalidBecause(const Region *R) const { if (!InvalidRegions.count(R)) return ""; return InvalidRegions.find(R)->second; } bool ScopDetection::isValidAffineFunction(const SCEV *S, Region &RefRegion, Value **BasePtr) const { assert(S && "S must not be null!"); bool isMemoryAccess = (BasePtr != 0); if (isMemoryAccess) *BasePtr = 0; DEBUG(dbgs() << "Checking " << *S << " ... "); if (isa(S)) { DEBUG(dbgs() << "Non Affine: SCEV could not be computed\n"); return false; } for (AffineSCEVIterator I = affine_begin(S, SE), E = affine_end(); I != E; ++I) { // The constant part must be a SCEVConstant. // TODO: support sizeof in coefficient. if (!isa(I->second)) { DEBUG(dbgs() << "Non Affine: Right hand side is not constant\n"); return false; } const SCEV *Var = I->first; // A constant offset is affine. if(isa(Var)) continue; // Memory accesses are allowed to have a base pointer. if (Var->getType()->isPointerTy()) { if (!isMemoryAccess) { DEBUG(dbgs() << "Non Affine: Pointer in non memory access\n"); return false; } assert(I->second->isOne() && "Only one as pointer coefficient allowed.\n"); const SCEVUnknown *BaseAddr = dyn_cast(Var); if (!BaseAddr || isa(BaseAddr->getValue())){ DEBUG(dbgs() << "Cannot handle base: " << *Var << "\n"); return false; } // BaseAddr must be invariant in Scop. if (!isParameter(BaseAddr, RefRegion, *LI, *SE)) { DEBUG(dbgs() << "Non Affine: Base address not invariant in SCoP\n"); return false; } assert(*BasePtr == 0 && "Found second base pointer.\n"); *BasePtr = BaseAddr->getValue(); continue; } if (isParameter(Var, RefRegion, *LI, *SE) || isIndVar(Var, RefRegion, *LI, *SE)) continue; DEBUG(dbgs() << "Non Affine: " ; Var->print(dbgs()); dbgs() << " is neither parameter nor induction variable\n"); return false; } DEBUG(dbgs() << " is affine.\n"); return !isMemoryAccess || (*BasePtr != 0); } bool ScopDetection::isValidCFG(BasicBlock &BB, DetectionContext &Context) const { Region &RefRegion = Context.CurRegion; TerminatorInst *TI = BB.getTerminator(); // Return instructions are only valid if the region is the top level region. if (isa(TI) && !RefRegion.getExit() && TI->getNumOperands() == 0) return true; BranchInst *Br = dyn_cast(TI); if (!Br) INVALID(CFG, "Non branch instruction terminates BB: " + BB.getNameStr()); if (Br->isUnconditional()) return true; Value *Condition = Br->getCondition(); // UndefValue is not allowed as condition. if (isa(Condition)) INVALID(AffFunc, "Condition based on 'undef' value in BB: " + BB.getNameStr()); // Only Constant and ICmpInst are allowed as condition. if (!(isa(Condition) || isa(Condition))) INVALID(AffFunc, "Condition in BB '" + BB.getNameStr() + "' neither " "constant nor an icmp instruction"); // Allow perfectly nested conditions. assert(Br->getNumSuccessors() == 2 && "Unexpected number of successors"); if (ICmpInst *ICmp = dyn_cast(Condition)) { // Unsigned comparisons are not allowed. They trigger overflow problems // in the code generation. // // TODO: This is not sufficient and just hides bugs. However it does pretty // well. if(ICmp->isUnsigned()) return false; // Are both operands of the ICmp affine? if (isa(ICmp->getOperand(0)) || isa(ICmp->getOperand(1))) INVALID(AffFunc, "undef operand in branch at BB: " + BB.getNameStr()); const SCEV *ScevLHS = SE->getSCEV(ICmp->getOperand(0)); const SCEV *ScevRHS = SE->getSCEV(ICmp->getOperand(1)); bool affineLHS = SCEVValidator::isValid(&Context.CurRegion, ScevLHS, *SE); bool affineRHS = SCEVValidator::isValid(&Context.CurRegion, ScevRHS, *SE); if (!affineLHS || !affineRHS) INVALID(AffFunc, "Non affine branch in BB: " + BB.getNameStr()); } // Allow loop exit conditions. Loop *L = LI->getLoopFor(&BB); if (L && L->getExitingBlock() == &BB) return true; // Allow perfectly nested conditions. Region *R = RI->getRegionFor(&BB); if (R->getEntry() != &BB) INVALID(CFG, "Not well structured condition at BB: " + BB.getNameStr()); return true; } bool ScopDetection::isValidCallInst(CallInst &CI) { if (CI.mayHaveSideEffects() || CI.doesNotReturn()) return false; if (CI.doesNotAccessMemory()) return true; Function *CalledFunction = CI.getCalledFunction(); // Indirect calls are not supported. if (CalledFunction == 0) return false; // TODO: Intrinsics. return false; } bool ScopDetection::isValidMemoryAccess(Instruction &Inst, DetectionContext &Context) const { Value *Ptr = getPointerOperand(Inst), *BasePtr; const SCEV *AccessFunction = SE->getSCEV(Ptr); if (!isValidAffineFunction(AccessFunction, Context.CurRegion, &BasePtr)) INVALID(AffFunc, "Bad memory address " << *AccessFunction); // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions // created by IndependentBlocks Pass. if (isa(BasePtr)) INVALID(Other, "Find bad intToptr prt: " << *BasePtr); // Check if the base pointer of the memory access does alias with // any other pointer. This cannot be handled at the moment. AliasSet &AS = Context.AST.getAliasSetForPointer(BasePtr, AliasAnalysis::UnknownSize, Inst.getMetadata(LLVMContext::MD_tbaa)); if (!AS.isMustAlias()) { DEBUG(dbgs() << "Bad pointer alias found:" << *BasePtr << "\nAS:\n" << AS); // STATSCOP triggers an assertion if we are in verifying mode. // This is generally good to check that we do not change the SCoP after we // run the SCoP detection and consequently to ensure that we can still // represent that SCoP. However, in case of aliasing this does not work. // The independent blocks pass may create memory references which seem to // alias, if -basicaa is not available. They actually do not. As we do not // not know this and we would fail here if we verify it. if (!Context.Verifying) { STATSCOP(Alias); } return false; } return true; } bool ScopDetection::hasScalarDependency(Instruction &Inst, Region &RefRegion) const { for (Instruction::use_iterator UI = Inst.use_begin(), UE = Inst.use_end(); UI != UE; ++UI) if (Instruction *Use = dyn_cast(*UI)) if (!RefRegion.contains(Use->getParent())) { // DirtyHack 1: PHINode user outside the Scop is not allow, if this // PHINode is induction variable, the scalar to array transform may // break it and introduce a non-indvar PHINode, which is not allow in // Scop. // This can be fix by: // Introduce a IndependentBlockPrepare pass, which translate all // PHINodes not in Scop to array. // The IndependentBlockPrepare pass can also split the entry block of // the function to hold the alloca instruction created by scalar to // array. and split the exit block of the Scop so the new create load // instruction for escape users will not break other Scops. if (isa(Use)) return true; } return false; } bool ScopDetection::isValidInstruction(Instruction &Inst, DetectionContext &Context) const { // Only canonical IVs are allowed. if (PHINode *PN = dyn_cast(&Inst)) if (!isIndVar(PN, LI)) INVALID(IndVar, "Non canonical PHI node: " << Inst); // Scalar dependencies are not allowed. if (hasScalarDependency(Inst, Context.CurRegion)) INVALID(Scalar, "Scalar dependency found: " << Inst); // We only check the call instruction but not invoke instruction. if (CallInst *CI = dyn_cast(&Inst)) { if (isValidCallInst(*CI)) return true; INVALID(FuncCall, "Call instruction: " << Inst); } if (!Inst.mayWriteToMemory() && !Inst.mayReadFromMemory()) { // Handle cast instruction. if (isa(Inst) || isa(Inst)) INVALID(Other, "Cast instruction: " << Inst); if (isa(Inst)) INVALID(Other, "Alloca instruction: " << Inst); return true; } // Check the access function. if (isa(Inst) || isa(Inst)) return isValidMemoryAccess(Inst, Context); // We do not know this instruction, therefore we assume it is invalid. INVALID(Other, "Unknown instruction: " << Inst); } bool ScopDetection::isValidBasicBlock(BasicBlock &BB, DetectionContext &Context) const { if (!isValidCFG(BB, Context)) return false; // Check all instructions, except the terminator instruction. for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (!isValidInstruction(*I, Context)) return false; Loop *L = LI->getLoopFor(&BB); if (L && L->getHeader() == &BB && !isValidLoop(L, Context)) return false; return true; } bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { PHINode *IndVar = L->getCanonicalInductionVariable(); // No canonical induction variable. if (!IndVar) INVALID(IndVar, "No canonical IV at loop header: " << L->getHeader()->getNameStr()); // Is the loop count affine? const SCEV *LoopCount = SE->getBackedgeTakenCount(L); if (!SCEVValidator::isValid(&Context.CurRegion, LoopCount, *SE)) INVALID(LoopBound, "Non affine loop bound '" << *LoopCount << "' in loop: " << L->getHeader()->getNameStr()); return true; } Region *ScopDetection::expandRegion(Region &R) { Region *CurrentRegion = &R; Region *TmpRegion = R.getExpandedRegion(); DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); while (TmpRegion) { DetectionContext Context(*TmpRegion, *AA, false /*verifying*/); DEBUG(dbgs() << "\t\tTrying " << TmpRegion->getNameStr() << "\n"); if (!allBlocksValid(Context)) break; if (isValidExit(Context)) { if (CurrentRegion != &R) delete CurrentRegion; CurrentRegion = TmpRegion; } Region *TmpRegion2 = TmpRegion->getExpandedRegion(); if (TmpRegion != &R && TmpRegion != CurrentRegion) delete TmpRegion; TmpRegion = TmpRegion2; } if (&R == CurrentRegion) return NULL; DEBUG(dbgs() << "\tto " << CurrentRegion->getNameStr() << "\n"); return CurrentRegion; } void ScopDetection::findScops(Region &R) { DetectionContext Context(R, *AA, false /*verifying*/); if (isValidRegion(Context)) { ++ValidRegion; ValidRegions.insert(&R); return; } InvalidRegions[&R] = LastFailure; for (Region::iterator I = R.begin(), E = R.end(); I != E; ++I) findScops(**I); // Try to expand regions. // // As the region tree normally only contains canonical regions, non canonical // regions that form a Scop are not found. Therefore, those non canonical // regions are checked by expanding the canonical ones. std::vector ToExpand; for (Region::iterator I = R.begin(), E = R.end(); I != E; ++I) ToExpand.push_back(*I); for (std::vector::iterator RI = ToExpand.begin(), RE = ToExpand.end(); RI != RE; ++RI) { Region *CurrentRegion = *RI; // Skip invalid regions. Regions may become invalid, if they are element of // an already expanded region. if (ValidRegions.find(CurrentRegion) == ValidRegions.end()) continue; Region *ExpandedR = expandRegion(*CurrentRegion); if (!ExpandedR) continue; R.addSubRegion(ExpandedR, true); ValidRegions.insert(ExpandedR); ValidRegions.erase(CurrentRegion); for (Region::iterator I = ExpandedR->begin(), E = ExpandedR->end(); I != E; ++I) ValidRegions.erase(*I); } } bool ScopDetection::allBlocksValid(DetectionContext &Context) const { Region &R = Context.CurRegion; for (Region::block_iterator I = R.block_begin(), E = R.block_end(); I != E; ++I) if (!isValidBasicBlock(*(I->getNodeAs()), Context)) return false; return true; } bool ScopDetection::isValidExit(DetectionContext &Context) const { Region &R = Context.CurRegion; // PHI nodes are not allowed in the exit basic block. if (BasicBlock *Exit = R.getExit()) { BasicBlock::iterator I = Exit->begin(); if (I != Exit->end() && isa (*I)) INVALID(Other, "PHI node in exit BB"); } return true; } bool ScopDetection::isValidRegion(DetectionContext &Context) const { Region &R = Context.CurRegion; DEBUG(dbgs() << "Checking region: " << R.getNameStr() << "\n\t"); // The toplevel region is no valid region. if (!R.getParent()) { DEBUG(dbgs() << "Top level region is invalid"; dbgs() << "\n"); return false; } // SCoP can not contains the entry block of the function, because we need // to insert alloca instruction there when translate scalar to array. if (R.getEntry() == &(R.getEntry()->getParent()->getEntryBlock())) INVALID(Other, "Region containing entry block of function is invalid!"); // Only a simple region is allowed. if (!R.isSimple()) INVALID(SimpleRegion, "Region not simple: " << R.getNameStr()); if (!allBlocksValid(Context)) return false; if (!isValidExit(Context)) return false; DEBUG(dbgs() << "OK\n"); return true; } bool ScopDetection::isValidFunction(llvm::Function &F) { return !InvalidFunctions.count(&F); } bool ScopDetection::runOnFunction(llvm::Function &F) { AA = &getAnalysis(); SE = &getAnalysis(); LI = &getAnalysis(); RI = &getAnalysis(); Region *TopRegion = RI->getTopLevelRegion(); releaseMemory(); if (OnlyFunction != "" && F.getNameStr() != OnlyFunction) return false; if(!isValidFunction(F)) return false; findScops(*TopRegion); return false; } void polly::ScopDetection::verifyRegion(const Region &R) const { assert(isMaxRegionInScop(R) && "Expect R is a valid region."); DetectionContext Context(const_cast(R), *AA, true /*verifying*/); isValidRegion(Context); } void polly::ScopDetection::verifyAnalysis() const { for (RegionSet::const_iterator I = ValidRegions.begin(), E = ValidRegions.end(); I != E; ++I) verifyRegion(**I); } void ScopDetection::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); // We also need AA and RegionInfo when we are verifying analysis. AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.setPreservesAll(); } void ScopDetection::print(raw_ostream &OS, const Module *) const { for (RegionSet::const_iterator I = ValidRegions.begin(), E = ValidRegions.end(); I != E; ++I) OS << "Valid Region for Scop: " << (*I)->getNameStr() << '\n'; OS << "\n"; } void ScopDetection::releaseMemory() { ValidRegions.clear(); InvalidRegions.clear(); // Do not clear the invalid function set. } char ScopDetection::ID = 0; INITIALIZE_PASS_BEGIN(ScopDetection, "polly-detect", "Polly - Detect static control parts (SCoPs)", false, false) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) INITIALIZE_PASS_DEPENDENCY(RegionInfo) INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(ScopDetection, "polly-detect", "Polly - Detect static control parts (SCoPs)", false, false) Pass *polly::createScopDetectionPass() { return new ScopDetection(); }