//===- ScopBuilder.cpp ----------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Create a polyhedral description for a static control flow region. // // The pass creates a polyhedral description of the Scops detected by the SCoP // detection derived from their LLVM-IR code. // //===----------------------------------------------------------------------===// #include "polly/ScopBuilder.h" #include "polly/Options.h" #include "polly/ScopDetection.h" #include "polly/ScopDetectionDiagnostic.h" #include "polly/ScopInfo.h" #include "polly/Support/GICHelper.h" #include "polly/Support/SCEVValidator.h" #include "polly/Support/ScopHelper.h" #include "polly/Support/VirtualInstruction.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include #include #include #include using namespace llvm; using namespace polly; #define DEBUG_TYPE "polly-scops" STATISTIC(ScopFound, "Number of valid Scops"); STATISTIC(RichScopFound, "Number of Scops containing a loop"); STATISTIC(InfeasibleScops, "Number of SCoPs with statically infeasible context."); bool polly::ModelReadOnlyScalars; static cl::opt XModelReadOnlyScalars( "polly-analyze-read-only-scalars", cl::desc("Model read-only scalar values in the scop description"), cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); static cl::opt UnprofitableScalarAccs( "polly-unprofitable-scalar-accs", cl::desc("Count statements with scalar accesses as not optimizable"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); static cl::opt DetectFortranArrays( "polly-detect-fortran-arrays", cl::desc("Detect Fortran arrays and use this for code generation"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); static cl::opt DetectReductions("polly-detect-reductions", cl::desc("Detect and exploit reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::cat(PollyCategory)); // Multiplicative reductions can be disabled separately as these kind of // operations can overflow easily. Additive reductions and bit operations // are in contrast pretty stable. static cl::opt DisableMultiplicativeReductions( "polly-disable-multiplicative-reductions", cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores }; static cl::opt StmtGranularity( "polly-stmt-granularity", cl::desc( "Algorithm to use for splitting basic blocks into multiple statements"), cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", "One statement per basic block"), clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep", "Scalar independence heuristic"), clEnumValN(GranularityChoice::Stores, "store", "Store-level granularity")), cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory)); void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, Region *NonAffineSubRegion, bool IsExitBlock) { // PHI nodes that are in the exit block of the region, hence if IsExitBlock is // true, are not modeled as ordinary PHI nodes as they are not part of the // region. However, we model the operands in the predecessor blocks that are // part of the region as regular scalar accesses. // If we can synthesize a PHI we can skip it, however only if it is in // the region. If it is not it can only be in the exit block of the region. // In this case we model the operands but not the PHI itself. auto *Scope = LI.getLoopFor(PHI->getParent()); if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope)) return; // PHI nodes are modeled as if they had been demoted prior to the SCoP // detection. Hence, the PHI is a load of a new memory location in which the // incoming value was written at the end of the incoming basic block. bool OnlyNonAffineSubRegionOperands = true; for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) { Value *Op = PHI->getIncomingValue(u); BasicBlock *OpBB = PHI->getIncomingBlock(u); ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u)); // Do not build PHI dependences inside a non-affine subregion, but make // sure that the necessary scalar values are still made available. if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) { auto *OpInst = dyn_cast(Op); if (!OpInst || !NonAffineSubRegion->contains(OpInst)) ensureValueRead(Op, OpStmt); continue; } OnlyNonAffineSubRegionOperands = false; ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock); } if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) { addPHIReadAccess(PHIStmt, PHI); } } void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt, Instruction *Inst) { assert(!isa(Inst)); // Pull-in required operands. for (Use &Op : Inst->operands()) ensureValueRead(Op.get(), UserStmt); } void ScopBuilder::buildEscapingDependences(Instruction *Inst) { // Check for uses of this instruction outside the scop. Because we do not // iterate over such instructions and therefore did not "ensure" the existence // of a write, we must determine such use here. if (scop->isEscaping(Inst)) ensureValueWrite(Inst); } /// Check that a value is a Fortran Array descriptor. /// /// We check if V has the following structure: /// %"struct.array1_real(kind=8)" = type { i8*, i, i, /// [ x %struct.descriptor_dimension] } /// /// /// %struct.descriptor_dimension = type { i, i, i } /// /// 1. V's type name starts with "struct.array" /// 2. V's type has layout as shown. /// 3. Final member of V's type has name "struct.descriptor_dimension", /// 4. "struct.descriptor_dimension" has layout as shown. /// 5. Consistent use of i where is some fixed integer number. /// /// We are interested in such types since this is the code that dragonegg /// generates for Fortran array descriptors. /// /// @param V the Value to be checked. /// /// @returns True if V is a Fortran array descriptor, False otherwise. bool isFortranArrayDescriptor(Value *V) { PointerType *PTy = dyn_cast(V->getType()); if (!PTy) return false; Type *Ty = PTy->getElementType(); assert(Ty && "Ty expected to be initialized"); auto *StructArrTy = dyn_cast(Ty); if (!(StructArrTy && StructArrTy->hasName())) return false; if (!StructArrTy->getName().startswith("struct.array")) return false; if (StructArrTy->getNumElements() != 4) return false; const ArrayRef ArrMemberTys = StructArrTy->elements(); // i8* match if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext())) return false; // Get a reference to the int type and check that all the members // share the same int type Type *IntTy = ArrMemberTys[1]; if (ArrMemberTys[2] != IntTy) return false; // type: [ x %struct.descriptor_dimension] ArrayType *DescriptorDimArrayTy = dyn_cast(ArrMemberTys[3]); if (!DescriptorDimArrayTy) return false; // type: %struct.descriptor_dimension := type { ixx, ixx, ixx } StructType *DescriptorDimTy = dyn_cast(DescriptorDimArrayTy->getElementType()); if (!(DescriptorDimTy && DescriptorDimTy->hasName())) return false; if (DescriptorDimTy->getName() != "struct.descriptor_dimension") return false; if (DescriptorDimTy->getNumElements() != 3) return false; for (auto MemberTy : DescriptorDimTy->elements()) { if (MemberTy != IntTy) return false; } return true; } Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) { // match: 4.1 & 4.2 store/load if (!isa(Inst) && !isa(Inst)) return nullptr; // match: 4 if (Inst.getAlignment() != 8) return nullptr; Value *Address = Inst.getPointerOperand(); const BitCastInst *Bitcast = nullptr; // [match: 3] if (auto *Slot = dyn_cast(Address)) { Value *TypedMem = Slot->getPointerOperand(); // match: 2 Bitcast = dyn_cast(TypedMem); } else { // match: 2 Bitcast = dyn_cast(Address); } if (!Bitcast) return nullptr; auto *MallocMem = Bitcast->getOperand(0); // match: 1 auto *MallocCall = dyn_cast(MallocMem); if (!MallocCall) return nullptr; Function *MallocFn = MallocCall->getCalledFunction(); if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc")) return nullptr; // Find all uses the malloc'd memory. // We are looking for a "store" into a struct with the type being the Fortran // descriptor type for (auto user : MallocMem->users()) { /// match: 5 auto *MallocStore = dyn_cast(user); if (!MallocStore) continue; auto *DescriptorGEP = dyn_cast(MallocStore->getPointerOperand()); if (!DescriptorGEP) continue; // match: 5 auto DescriptorType = dyn_cast(DescriptorGEP->getSourceElementType()); if (!(DescriptorType && DescriptorType->hasName())) continue; Value *Descriptor = dyn_cast(DescriptorGEP->getPointerOperand()); if (!Descriptor) continue; if (!isFortranArrayDescriptor(Descriptor)) continue; return Descriptor; } return nullptr; } Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) { // match: 3 if (!isa(Inst) && !isa(Inst)) return nullptr; Value *Slot = Inst.getPointerOperand(); LoadInst *MemLoad = nullptr; // [match: 2] if (auto *SlotGEP = dyn_cast(Slot)) { // match: 1 MemLoad = dyn_cast(SlotGEP->getPointerOperand()); } else { // match: 1 MemLoad = dyn_cast(Slot); } if (!MemLoad) return nullptr; auto *BitcastOperator = dyn_cast(MemLoad->getPointerOperand()); if (!BitcastOperator) return nullptr; Value *Descriptor = dyn_cast(BitcastOperator->getOperand(0)); if (!Descriptor) return nullptr; if (!isFortranArrayDescriptor(Descriptor)) return nullptr; return Descriptor; } bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) { Value *Val = Inst.getValueOperand(); Type *ElementType = Val->getType(); Value *Address = Inst.getPointerOperand(); const SCEV *AccessFunction = SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); const SCEVUnknown *BasePointer = dyn_cast(SE.getPointerBase(AccessFunction)); enum MemoryAccess::AccessType AccType = isa(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; if (auto *BitCast = dyn_cast(Address)) { auto *Src = BitCast->getOperand(0); auto *SrcTy = Src->getType(); auto *DstTy = BitCast->getType(); // Do not try to delinearize non-sized (opaque) pointers. if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) || (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) { return false; } if (SrcTy->isPointerTy() && DstTy->isPointerTy() && DL.getTypeAllocSize(SrcTy->getPointerElementType()) == DL.getTypeAllocSize(DstTy->getPointerElementType())) Address = Src; } auto *GEP = dyn_cast(Address); if (!GEP) return false; std::vector Subscripts; std::vector Sizes; std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE); auto *BasePtr = GEP->getOperand(0); if (auto *BasePtrCast = dyn_cast(BasePtr)) BasePtr = BasePtrCast->getOperand(0); // Check for identical base pointers to ensure that we do not miss index // offsets that have been added before this GEP is applied. if (BasePtr != BasePointer->getValue()) return false; std::vector SizesSCEV; const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); Loop *SurroundingLoop = Stmt->getSurroundingLoop(); for (auto *Subscript : Subscripts) { InvariantLoadsSetTy AccessILS; if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE, &AccessILS)) return false; for (LoadInst *LInst : AccessILS) if (!ScopRIL.count(LInst)) return false; } if (Sizes.empty()) return false; SizesSCEV.push_back(nullptr); for (auto V : Sizes) SizesSCEV.push_back(SE.getSCEV( ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V))); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, true, Subscripts, SizesSCEV, Val); return true; } bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) { if (!PollyDelinearize) return false; Value *Address = Inst.getPointerOperand(); Value *Val = Inst.getValueOperand(); Type *ElementType = Val->getType(); unsigned ElementSize = DL.getTypeAllocSize(ElementType); enum MemoryAccess::AccessType AccType = isa(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; const SCEV *AccessFunction = SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); const SCEVUnknown *BasePointer = dyn_cast(SE.getPointerBase(AccessFunction)); assert(BasePointer && "Could not find base pointer"); auto &InsnToMemAcc = scop->getInsnToMemAccMap(); auto AccItr = InsnToMemAcc.find(Inst); if (AccItr == InsnToMemAcc.end()) return false; std::vector Sizes = {nullptr}; Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(), AccItr->second.Shape->DelinearizedSizes.end()); // In case only the element size is contained in the 'Sizes' array, the // access does not access a real multi-dimensional array. Hence, we allow // the normal single-dimensional access construction to handle this. if (Sizes.size() == 1) return false; // Remove the element size. This information is already provided by the // ElementSize parameter. In case the element size of this access and the // element size used for delinearization differs the delinearization is // incorrect. Hence, we invalidate the scop. // // TODO: Handle delinearization with differing element sizes. auto DelinearizedSize = cast(Sizes.back())->getAPInt().getSExtValue(); Sizes.pop_back(); if (ElementSize != DelinearizedSize) scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, true, AccItr->second.DelinearizedSubscripts, Sizes, Val); return true; } bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) { auto *MemIntr = dyn_cast_or_null(Inst); if (MemIntr == nullptr) return false; auto *L = LI.getLoopFor(Inst->getParent()); auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L); assert(LengthVal); // Check if the length val is actually affine or if we overapproximate it InvariantLoadsSetTy AccessILS; const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); Loop *SurroundingLoop = Stmt->getSurroundingLoop(); bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop, LengthVal, SE, &AccessILS); for (LoadInst *LInst : AccessILS) if (!ScopRIL.count(LInst)) LengthIsAffine = false; if (!LengthIsAffine) LengthVal = nullptr; auto *DestPtrVal = MemIntr->getDest(); assert(DestPtrVal); auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L); assert(DestAccFunc); // Ignore accesses to "NULL". // TODO: We could use this to optimize the region further, e.g., intersect // the context with // isl_set_complement(isl_set_params(getDomain())) // as we know it would be undefined to execute this instruction anyway. if (DestAccFunc->isZero()) return true; auto *DestPtrSCEV = dyn_cast(SE.getPointerBase(DestAccFunc)); assert(DestPtrSCEV); DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), IntegerType::getInt8Ty(DestPtrVal->getContext()), LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr}, Inst.getValueOperand()); auto *MemTrans = dyn_cast(MemIntr); if (!MemTrans) return true; auto *SrcPtrVal = MemTrans->getSource(); assert(SrcPtrVal); auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L); assert(SrcAccFunc); // Ignore accesses to "NULL". // TODO: See above TODO if (SrcAccFunc->isZero()) return true; auto *SrcPtrSCEV = dyn_cast(SE.getPointerBase(SrcAccFunc)); assert(SrcPtrSCEV); SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV); addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(), IntegerType::getInt8Ty(SrcPtrVal->getContext()), LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr}, Inst.getValueOperand()); return true; } bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) { auto *CI = dyn_cast_or_null(Inst); if (CI == nullptr) return false; if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI)) return true; bool ReadOnly = false; auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0); auto *CalledFunction = CI->getCalledFunction(); switch (AA.getModRefBehavior(CalledFunction)) { case FMRB_UnknownModRefBehavior: llvm_unreachable("Unknown mod ref behaviour cannot be represented."); case FMRB_DoesNotAccessMemory: return true; case FMRB_DoesNotReadMemory: case FMRB_OnlyAccessesInaccessibleMem: case FMRB_OnlyAccessesInaccessibleOrArgMem: return false; case FMRB_OnlyReadsMemory: GlobalReads.emplace_back(Stmt, CI); return true; case FMRB_OnlyReadsArgumentPointees: ReadOnly = true; // Fall through case FMRB_OnlyAccessesArgumentPointees: { auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE; Loop *L = LI.getLoopFor(Inst->getParent()); for (const auto &Arg : CI->arg_operands()) { if (!Arg->getType()->isPointerTy()) continue; auto *ArgSCEV = SE.getSCEVAtScope(Arg, L); if (ArgSCEV->isZero()) continue; auto *ArgBasePtr = cast(SE.getPointerBase(ArgSCEV)); addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); } return true; } } return true; } void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) { Value *Address = Inst.getPointerOperand(); Value *Val = Inst.getValueOperand(); Type *ElementType = Val->getType(); enum MemoryAccess::AccessType AccType = isa(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; const SCEV *AccessFunction = SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); const SCEVUnknown *BasePointer = dyn_cast(SE.getPointerBase(AccessFunction)); assert(BasePointer && "Could not find base pointer"); AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer); // Check if the access depends on a loop contained in a non-affine subregion. bool isVariantInNonAffineLoop = false; SetVector Loops; findLoops(AccessFunction, Loops); for (const Loop *L : Loops) if (Stmt->contains(L)) { isVariantInNonAffineLoop = true; break; } InvariantLoadsSetTy AccessILS; Loop *SurroundingLoop = Stmt->getSurroundingLoop(); bool IsAffine = !isVariantInNonAffineLoop && isAffineExpr(&scop->getRegion(), SurroundingLoop, AccessFunction, SE, &AccessILS); const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); for (LoadInst *LInst : AccessILS) if (!ScopRIL.count(LInst)) IsAffine = false; if (!IsAffine && AccType == MemoryAccess::MUST_WRITE) AccType = MemoryAccess::MAY_WRITE; addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, IsAffine, {AccessFunction}, {nullptr}, Val); } void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) { if (buildAccessMemIntrinsic(Inst, Stmt)) return; if (buildAccessCallInst(Inst, Stmt)) return; if (buildAccessMultiDimFixed(Inst, Stmt)) return; if (buildAccessMultiDimParam(Inst, Stmt)) return; buildAccessSingleDim(Inst, Stmt); } void ScopBuilder::buildAccessFunctions() { for (auto &Stmt : *scop) { if (Stmt.isBlockStmt()) { buildAccessFunctions(&Stmt, *Stmt.getBasicBlock()); continue; } Region *R = Stmt.getRegion(); for (BasicBlock *BB : R->blocks()) buildAccessFunctions(&Stmt, *BB, R); } // Build write accesses for values that are used after the SCoP. // The instructions defining them might be synthesizable and therefore not // contained in any statement, hence we iterate over the original instructions // to identify all escaping values. for (BasicBlock *BB : scop->getRegion().blocks()) { for (Instruction &Inst : *BB) buildEscapingDependences(&Inst); } } bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) { return !isa(Inst) && !isIgnoredIntrinsic(Inst) && !canSynthesize(Inst, *scop, &SE, L); } /// Generate a name for a statement. /// /// @param BB The basic block the statement will represent. /// @param BBIdx The index of the @p BB relative to other BBs/regions. /// @param Count The index of the created statement in @p BB. /// @param IsMain Whether this is the main of all statement for @p BB. If true, /// no suffix will be added. /// @param IsLast Uses a special indicator for the last statement of a BB. static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count, bool IsMain, bool IsLast = false) { std::string Suffix; if (!IsMain) { if (UseInstructionNames) Suffix = '_'; if (IsLast) Suffix += "last"; else if (Count < 26) Suffix += 'a' + Count; else Suffix += std::to_string(Count); } return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames); } /// Generate a name for a statement that represents a non-affine subregion. /// /// @param R The region the statement will represent. /// @param RIdx The index of the @p R relative to other BBs/regions. static std::string makeStmtName(Region *R, long RIdx) { return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "", UseInstructionNames); } void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) { Loop *SurroundingLoop = LI.getLoopFor(BB); int Count = 0; long BBIdx = scop->getNextStmtIdx(); std::vector Instructions; for (Instruction &Inst : *BB) { if (shouldModelInst(&Inst, SurroundingLoop)) Instructions.push_back(&Inst); if (Inst.getMetadata("polly_split_after") || (SplitOnStore && isa(Inst))) { std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); Count++; Instructions.clear(); } } std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); } /// Is @p Inst an ordered instruction? /// /// An unordered instruction is an instruction, such that a sequence of /// unordered instructions can be permuted without changing semantics. Any /// instruction for which this is not always the case is ordered. static bool isOrderedInstruction(Instruction *Inst) { return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory(); } /// Join instructions to the same statement if one uses the scalar result of the /// other. static void joinOperandTree(EquivalenceClasses &UnionFind, ArrayRef ModeledInsts) { for (Instruction *Inst : ModeledInsts) { if (isa(Inst)) continue; for (Use &Op : Inst->operands()) { Instruction *OpInst = dyn_cast(Op.get()); if (!OpInst) continue; // Check if OpInst is in the BB and is a modeled instruction. auto OpVal = UnionFind.findValue(OpInst); if (OpVal == UnionFind.end()) continue; UnionFind.unionSets(Inst, OpInst); } } } /// Ensure that the order of ordered instructions does not change. /// /// If we encounter an ordered instruction enclosed in instructions belonging to /// a different statement (which might as well contain ordered instructions, but /// this is not tested here), join them. static void joinOrderedInstructions(EquivalenceClasses &UnionFind, ArrayRef ModeledInsts) { SetVector SeenLeaders; for (Instruction *Inst : ModeledInsts) { if (!isOrderedInstruction(Inst)) continue; Instruction *Leader = UnionFind.getLeaderValue(Inst); bool Inserted = SeenLeaders.insert(Leader); if (Inserted) continue; // Merge statements to close holes. Say, we have already seen statements A // and B, in this order. Then we see an instruction of A again and we would // see the pattern "A B A". This function joins all statements until the // only seen occurrence of A. for (Instruction *Prev : reverse(SeenLeaders)) { // Items added to 'SeenLeaders' are leaders, but may have lost their // leadership status when merged into another statement. Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back()); if (PrevLeader == Leader) break; UnionFind.unionSets(Prev, Leader); } } } /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for /// the incoming values from this block are executed after the PHI READ. /// /// Otherwise it could overwrite the incoming value from before the BB with the /// value for the next execution. This can happen if the PHI WRITE is added to /// the statement with the instruction that defines the incoming value (instead /// of the last statement of the same BB). To ensure that the PHI READ and WRITE /// are in order, we put both into the statement. PHI WRITEs are always executed /// after PHI READs when they are in the same statement. /// /// TODO: This is an overpessimization. We only have to ensure that the PHI /// WRITE is not put into a statement containing the PHI itself. That could also /// be done by /// - having all (strongly connected) PHIs in a single statement, /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only /// has a chance of being lifted before a PHI by being in a statement with a /// PHI that comes before in the basic block), or /// - when uniting statements, ensure that no (relevant) PHIs are overtaken. static void joinOrderedPHIs(EquivalenceClasses &UnionFind, ArrayRef ModeledInsts) { for (Instruction *Inst : ModeledInsts) { PHINode *PHI = dyn_cast(Inst); if (!PHI) continue; int Idx = PHI->getBasicBlockIndex(PHI->getParent()); if (Idx < 0) continue; Instruction *IncomingVal = dyn_cast(PHI->getIncomingValue(Idx)); if (!IncomingVal) continue; UnionFind.unionSets(PHI, IncomingVal); } } void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) { Loop *L = LI.getLoopFor(BB); // Extracting out modeled instructions saves us from checking // shouldModelInst() repeatedly. SmallVector ModeledInsts; EquivalenceClasses UnionFind; Instruction *MainInst = nullptr; for (Instruction &Inst : *BB) { if (!shouldModelInst(&Inst, L)) continue; ModeledInsts.push_back(&Inst); UnionFind.insert(&Inst); // When a BB is split into multiple statements, the main statement is the // one containing the 'main' instruction. We select the first instruction // that is unlikely to be removed (because it has side-effects) as the main // one. It is used to ensure that at least one statement from the bb has the // same name as with -polly-stmt-granularity=bb. if (!MainInst && (isa(Inst) || (isa(Inst) && !isa(Inst)))) MainInst = &Inst; } joinOperandTree(UnionFind, ModeledInsts); joinOrderedInstructions(UnionFind, ModeledInsts); joinOrderedPHIs(UnionFind, ModeledInsts); // The list of instructions for statement (statement represented by the leader // instruction). The order of statements instructions is reversed such that // the epilogue is first. This makes it easier to ensure that the epilogue is // the last statement. MapVector> LeaderToInstList; // Collect the instructions of all leaders. UnionFind's member iterator // unfortunately are not in any specific order. for (Instruction &Inst : reverse(*BB)) { auto LeaderIt = UnionFind.findLeader(&Inst); if (LeaderIt == UnionFind.member_end()) continue; std::vector &InstList = LeaderToInstList[*LeaderIt]; InstList.push_back(&Inst); } // Finally build the statements. int Count = 0; long BBIdx = scop->getNextStmtIdx(); bool MainFound = false; for (auto &Instructions : reverse(LeaderToInstList)) { std::vector &InstList = Instructions.second; // If there is no main instruction, make the first statement the main. bool IsMain; if (MainInst) IsMain = std::find(InstList.begin(), InstList.end(), MainInst) != InstList.end(); else IsMain = (Count == 0); if (IsMain) MainFound = true; std::reverse(InstList.begin(), InstList.end()); std::string Name = makeStmtName(BB, BBIdx, Count, IsMain); scop->addScopStmt(BB, Name, L, std::move(InstList)); Count += 1; } // Unconditionally add an epilogue (last statement). It contains no // instructions, but holds the PHI write accesses for successor basic blocks, // if the incoming value is not defined in another statement if the same BB. // The epilogue will be removed if no PHIWrite is added to it. std::string EpilogueName = makeStmtName(BB, BBIdx, Count, !MainFound, true); scop->addScopStmt(BB, EpilogueName, L, {}); } void ScopBuilder::buildStmts(Region &SR) { if (scop->isNonAffineSubRegion(&SR)) { std::vector Instructions; Loop *SurroundingLoop = getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops()); for (Instruction &Inst : *SR.getEntry()) if (shouldModelInst(&Inst, SurroundingLoop)) Instructions.push_back(&Inst); long RIdx = scop->getNextStmtIdx(); std::string Name = makeStmtName(&SR, RIdx); scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions); return; } for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I) if (I->isSubRegion()) buildStmts(*I->getNodeAs()); else { BasicBlock *BB = I->getNodeAs(); switch (StmtGranularity) { case GranularityChoice::BasicBlocks: buildSequentialBlockStmts(BB); break; case GranularityChoice::ScalarIndependence: buildEqivClassBlockStmts(BB); break; case GranularityChoice::Stores: buildSequentialBlockStmts(BB, true); break; } } } void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB, Region *NonAffineSubRegion) { assert( Stmt && "The exit BB is the only one that cannot be represented by a statement"); assert(Stmt->represents(&BB)); // We do not build access functions for error blocks, as they may contain // instructions we can not model. if (isErrorBlock(BB, scop->getRegion(), LI, DT)) return; auto BuildAccessesForInst = [this, Stmt, NonAffineSubRegion](Instruction *Inst) { PHINode *PHI = dyn_cast(Inst); if (PHI) buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false); if (auto MemInst = MemAccInst::dyn_cast(*Inst)) { assert(Stmt && "Cannot build access function in non-existing statement"); buildMemoryAccess(MemInst, Stmt); } // PHI nodes have already been modeled above and TerminatorInsts that are // not part of a non-affine subregion are fully modeled and regenerated // from the polyhedral domains. Hence, they do not need to be modeled as // explicit data dependences. if (!PHI) buildScalarDependences(Stmt, Inst); }; const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); bool IsEntryBlock = (Stmt->getEntryBlock() == &BB); if (IsEntryBlock) { for (Instruction *Inst : Stmt->getInstructions()) BuildAccessesForInst(Inst); if (Stmt->isRegionStmt()) BuildAccessesForInst(BB.getTerminator()); } else { for (Instruction &Inst : BB) { if (isIgnoredIntrinsic(&Inst)) continue; // Invariant loads already have been processed. if (isa(Inst) && RIL.count(cast(&Inst))) continue; BuildAccessesForInst(&Inst); } } } MemoryAccess *ScopBuilder::addMemoryAccess( ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, ArrayRef Subscripts, ArrayRef Sizes, MemoryKind Kind) { bool isKnownMustAccess = false; // Accesses in single-basic block statements are always executed. if (Stmt->isBlockStmt()) isKnownMustAccess = true; if (Stmt->isRegionStmt()) { // Accesses that dominate the exit block of a non-affine region are always // executed. In non-affine regions there may exist MemoryKind::Values that // do not dominate the exit. MemoryKind::Values will always dominate the // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the // non-affine region. if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())) isKnownMustAccess = true; } // Non-affine PHI writes do not "happen" at a particular instruction, but // after exiting the statement. Therefore they are guaranteed to execute and // overwrite the old value. if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI) isKnownMustAccess = true; if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE) AccType = MemoryAccess::MAY_WRITE; auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType, Affine, Subscripts, Sizes, AccessValue, Kind); scop->addAccessFunction(Access); Stmt->addAccess(Access); return Access; } void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, MemoryAccess::AccessType AccType, Value *BaseAddress, Type *ElementType, bool IsAffine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue) { ArrayBasePointers.insert(BaseAddress); auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, ElementType, IsAffine, AccessValue, Subscripts, Sizes, MemoryKind::Array); if (!DetectFortranArrays) return; if (Value *FAD = findFADAllocationInvisible(MemAccInst)) MemAccess->setFortranArrayDescriptor(FAD); else if (Value *FAD = findFADAllocationVisible(MemAccInst)) MemAccess->setFortranArrayDescriptor(FAD); } void ScopBuilder::ensureValueWrite(Instruction *Inst) { // Find the statement that defines the value of Inst. That statement has to // write the value to make it available to those statements that read it. ScopStmt *Stmt = scop->getStmtFor(Inst); // It is possible that the value is synthesizable within a loop (such that it // is not part of any statement), but not after the loop (where you need the // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will // avoid this. In case the IR has no such PHI, use the last statement (where // the value is synthesizable) to write the value. if (!Stmt) Stmt = scop->getLastStmtFor(Inst->getParent()); // Inst not defined within this SCoP. if (!Stmt) return; // Do not process further if the instruction is already written. if (Stmt->lookupValueWriteOf(Inst)) return; addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(), true, Inst, ArrayRef(), ArrayRef(), MemoryKind::Value); } void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) { // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality // to be able to replace this one. Currently, there is a split responsibility. // In a first step, the MemoryAccess is created, but without the // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the // AccessRelation is created. At least for scalar accesses, there is no new // information available at ScopStmt::buildAccessRelations(), so we could // create the AccessRelation right away. This is what // ScopStmt::ensureValueRead(Value*) does. auto *Scope = UserStmt->getSurroundingLoop(); auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false); switch (VUse.getKind()) { case VirtualUse::Constant: case VirtualUse::Block: case VirtualUse::Synthesizable: case VirtualUse::Hoisted: case VirtualUse::Intra: // Uses of these kinds do not need a MemoryAccess. break; case VirtualUse::ReadOnly: // Add MemoryAccess for invariant values only if requested. if (!ModelReadOnlyScalars) break; LLVM_FALLTHROUGH; case VirtualUse::Inter: // Do not create another MemoryAccess for reloading the value if one already // exists. if (UserStmt->lookupValueReadOf(V)) break; addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(), true, V, ArrayRef(), ArrayRef(), MemoryKind::Value); // Inter-statement uses need to write the value in their defining statement. if (VUse.isInter()) ensureValueWrite(cast(V)); break; } } void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt, BasicBlock *IncomingBlock, Value *IncomingValue, bool IsExitBlock) { // As the incoming block might turn out to be an error statement ensure we // will create an exit PHI SAI object. It is needed during code generation // and would be created later anyway. if (IsExitBlock) scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {}, MemoryKind::ExitPHI); // This is possible if PHI is in the SCoP's entry block. The incoming blocks // from outside the SCoP's region have no statement representation. if (!IncomingStmt) return; // Take care for the incoming value being available in the incoming block. // This must be done before the check for multiple PHI writes because multiple // exiting edges from subregion each can be the effective written value of the // subregion. As such, all of them must be made available in the subregion // statement. ensureValueRead(IncomingValue, IncomingStmt); // Do not add more than one MemoryAccess per PHINode and ScopStmt. if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) { assert(Acc->getAccessInstruction() == PHI); Acc->addIncoming(IncomingBlock, IncomingValue); return; } MemoryAccess *Acc = addMemoryAccess( IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, PHI, ArrayRef(), ArrayRef(), IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); assert(Acc); Acc->addIncoming(IncomingBlock, IncomingValue); } void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) { addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true, PHI, ArrayRef(), ArrayRef(), MemoryKind::PHI); } void ScopBuilder::buildDomain(ScopStmt &Stmt) { isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt); Stmt.Domain = scop->getDomainConditions(&Stmt); Stmt.Domain = Stmt.Domain.set_tuple_id(Id); } void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) { isl::set Domain = Stmt.getDomain(); BasicBlock *BB = Stmt.getEntryBlock(); Loop *L = LI.getLoopFor(BB); while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L)) L = L->getParentLoop(); SmallVector Loops; while (L && Stmt.getParent()->getRegion().contains(L)) { Loops.push_back(L); L = L->getParentLoop(); } Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend()); } /// Return the reduction type for a given binary operator. static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp, const Instruction *Load) { if (!BinOp) return MemoryAccess::RT_NONE; switch (BinOp->getOpcode()) { case Instruction::FAdd: if (!BinOp->isFast()) return MemoryAccess::RT_NONE; // Fall through case Instruction::Add: return MemoryAccess::RT_ADD; case Instruction::Or: return MemoryAccess::RT_BOR; case Instruction::Xor: return MemoryAccess::RT_BXOR; case Instruction::And: return MemoryAccess::RT_BAND; case Instruction::FMul: if (!BinOp->isFast()) return MemoryAccess::RT_NONE; // Fall through case Instruction::Mul: if (DisableMultiplicativeReductions) return MemoryAccess::RT_NONE; return MemoryAccess::RT_MUL; default: return MemoryAccess::RT_NONE; } } void ScopBuilder::checkForReductions(ScopStmt &Stmt) { SmallVector Loads; SmallVector, 4> Candidates; // First collect candidate load-store reduction chains by iterating over all // stores and collecting possible reduction loads. for (MemoryAccess *StoreMA : Stmt) { if (StoreMA->isRead()) continue; Loads.clear(); collectCandidateReductionLoads(StoreMA, Loads); for (MemoryAccess *LoadMA : Loads) Candidates.push_back(std::make_pair(LoadMA, StoreMA)); } // Then check each possible candidate pair. for (const auto &CandidatePair : Candidates) { bool Valid = true; isl::map LoadAccs = CandidatePair.first->getAccessRelation(); isl::map StoreAccs = CandidatePair.second->getAccessRelation(); // Skip those with obviously unequal base addresses. if (!LoadAccs.has_equal_space(StoreAccs)) { continue; } // And check if the remaining for overlap with other memory accesses. isl::map AllAccsRel = LoadAccs.unite(StoreAccs); AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain()); isl::set AllAccs = AllAccsRel.range(); for (MemoryAccess *MA : Stmt) { if (MA == CandidatePair.first || MA == CandidatePair.second) continue; isl::map AccRel = MA->getAccessRelation().intersect_domain(Stmt.getDomain()); isl::set Accs = AccRel.range(); if (AllAccs.has_equal_space(Accs)) { isl::set OverlapAccs = Accs.intersect(AllAccs); Valid = Valid && OverlapAccs.is_empty(); } } if (!Valid) continue; const LoadInst *Load = dyn_cast(CandidatePair.first->getAccessInstruction()); MemoryAccess::ReductionType RT = getReductionType(dyn_cast(Load->user_back()), Load); // If no overlapping access was found we mark the load and store as // reduction like. CandidatePair.first->markAsReductionLike(RT); CandidatePair.second->markAsReductionLike(RT); } } void ScopBuilder::collectCandidateReductionLoads( MemoryAccess *StoreMA, SmallVectorImpl &Loads) { ScopStmt *Stmt = StoreMA->getStatement(); auto *Store = dyn_cast(StoreMA->getAccessInstruction()); if (!Store) return; // Skip if there is not one binary operator between the load and the store auto *BinOp = dyn_cast(Store->getValueOperand()); if (!BinOp) return; // Skip if the binary operators has multiple uses if (BinOp->getNumUses() != 1) return; // Skip if the opcode of the binary operator is not commutative/associative if (!BinOp->isCommutative() || !BinOp->isAssociative()) return; // Skip if the binary operator is outside the current SCoP if (BinOp->getParent() != Store->getParent()) return; // Skip if it is a multiplicative reduction and we disabled them if (DisableMultiplicativeReductions && (BinOp->getOpcode() == Instruction::Mul || BinOp->getOpcode() == Instruction::FMul)) return; // Check the binary operator operands for a candidate load auto *PossibleLoad0 = dyn_cast(BinOp->getOperand(0)); auto *PossibleLoad1 = dyn_cast(BinOp->getOperand(1)); if (!PossibleLoad0 && !PossibleLoad1) return; // A load is only a candidate if it cannot escape (thus has only this use) if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1) if (PossibleLoad0->getParent() == Store->getParent()) Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0)); if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1) if (PossibleLoad1->getParent() == Store->getParent()) Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1)); } void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) { for (MemoryAccess *Access : Stmt.MemAccs) { Type *ElementType = Access->getElementType(); MemoryKind Ty; if (Access->isPHIKind()) Ty = MemoryKind::PHI; else if (Access->isExitPHIKind()) Ty = MemoryKind::ExitPHI; else if (Access->isValueKind()) Ty = MemoryKind::Value; else Ty = MemoryKind::Array; auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(), ElementType, Access->Sizes, Ty); Access->buildAccessRelation(SAI); scop->addAccessData(Access); } } #ifndef NDEBUG static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) { auto PhysUse = VirtualUse::create(S, Op, &LI, false); auto VirtUse = VirtualUse::create(S, Op, &LI, true); assert(PhysUse.getKind() == VirtUse.getKind()); } /// Check the consistency of every statement's MemoryAccesses. /// /// The check is carried out by expecting the "physical" kind of use (derived /// from the BasicBlocks instructions resides in) to be same as the "virtual" /// kind of use (derived from a statement's MemoryAccess). /// /// The "physical" uses are taken by ensureValueRead to determine whether to /// create MemoryAccesses. When done, the kind of scalar access should be the /// same no matter which way it was derived. /// /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence /// can intentionally influence on the kind of uses (not corresponding to the /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has /// to pick up the virtual uses. But here in the code generator, this has not /// happened yet, such that virtual and physical uses are equivalent. static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) { for (auto *BB : S->getRegion().blocks()) { for (auto &Inst : *BB) { auto *Stmt = S->getStmtFor(&Inst); if (!Stmt) continue; if (isIgnoredIntrinsic(&Inst)) continue; // Branch conditions are encoded in the statement domains. if (isa(&Inst) && Stmt->isBlockStmt()) continue; // Verify all uses. for (auto &Op : Inst.operands()) verifyUse(S, Op, LI); // Stores do not produce values used by other statements. if (isa(Inst)) continue; // For every value defined in the block, also check that a use of that // value in the same statement would not be an inter-statement use. It can // still be synthesizable or load-hoisted, but these kind of instructions // are not directly copied in code-generation. auto VirtDef = VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true); assert(VirtDef.getKind() == VirtualUse::Synthesizable || VirtDef.getKind() == VirtualUse::Intra || VirtDef.getKind() == VirtualUse::Hoisted); } } if (S->hasSingleExitEdge()) return; // PHINodes in the SCoP region's exit block are also uses to be checked. if (!S->getRegion().isTopLevelRegion()) { for (auto &Inst : *S->getRegion().getExit()) { if (!isa(Inst)) break; for (auto &Op : Inst.operands()) verifyUse(S, Op, LI); } } } #endif /// Return the block that is the representing block for @p RN. static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) { return RN->isSubRegion() ? RN->getNodeAs()->getEntry() : RN->getNodeAs(); } void ScopBuilder::buildScop(Region &R, AssumptionCache &AC, OptimizationRemarkEmitter &ORE) { scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE)); buildStmts(R); // Create all invariant load instructions first. These are categorized as // 'synthesizable', therefore are not part of any ScopStmt but need to be // created somewhere. const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); for (BasicBlock *BB : scop->getRegion().blocks()) { if (isErrorBlock(*BB, scop->getRegion(), LI, DT)) continue; for (Instruction &Inst : *BB) { LoadInst *Load = dyn_cast(&Inst); if (!Load) continue; if (!RIL.count(Load)) continue; // Invariant loads require a MemoryAccess to be created in some statement. // It is not important to which statement the MemoryAccess is added // because it will later be removed from the ScopStmt again. We chose the // first statement of the basic block the LoadInst is in. ArrayRef List = scop->getStmtListFor(BB); assert(!List.empty()); ScopStmt *RILStmt = List.front(); buildMemoryAccess(Load, RILStmt); } } buildAccessFunctions(); // In case the region does not have an exiting block we will later (during // code generation) split the exit block. This will move potential PHI nodes // from the current exit block into the new region exiting block. Hence, PHI // nodes that are at this point not part of the region will be. // To handle these PHI nodes later we will now model their operands as scalar // accesses. Note that we do not model anything in the exit block if we have // an exiting block in the region, as there will not be any splitting later. if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) { for (Instruction &Inst : *R.getExit()) { PHINode *PHI = dyn_cast(&Inst); if (!PHI) break; buildPHIAccesses(nullptr, PHI, nullptr, true); } } // Create memory accesses for global reads since all arrays are now known. auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0); for (auto GlobalReadPair : GlobalReads) { ScopStmt *GlobalReadStmt = GlobalReadPair.first; Instruction *GlobalRead = GlobalReadPair.second; for (auto *BP : ArrayBasePointers) addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ, BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead); } scop->buildInvariantEquivalenceClasses(); /// A map from basic blocks to their invalid domains. DenseMap InvalidDomainMap; if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) { LLVM_DEBUG( dbgs() << "Bailing-out because buildDomains encountered problems\n"); return; } scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap); // Initialize the invalid domain. for (ScopStmt &Stmt : scop->Stmts) if (Stmt.isBlockStmt()) Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]); else Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock( Stmt.getRegion()->getNode())]); // Remove empty statements. // Exit early in case there are no executable statements left in this scop. scop->removeStmtNotInDomainMap(); scop->simplifySCoP(false); if (scop->isEmpty()) { LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n"); return; } // The ScopStmts now have enough information to initialize themselves. for (ScopStmt &Stmt : *scop) { collectSurroundingLoops(Stmt); buildDomain(Stmt); buildAccessRelations(Stmt); if (DetectReductions) checkForReductions(Stmt); } // Check early for a feasible runtime context. if (!scop->hasFeasibleRuntimeContext()) { LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n"); return; } // Check early for profitability. Afterwards it cannot change anymore, // only the runtime context could become infeasible. if (!scop->isProfitable(UnprofitableScalarAccs)) { scop->invalidate(PROFITABLE, DebugLoc()); LLVM_DEBUG( dbgs() << "Bailing-out because SCoP is not considered profitable\n"); return; } scop->buildSchedule(LI); scop->finalizeAccesses(); scop->realignParams(); scop->addUserContext(); // After the context was fully constructed, thus all our knowledge about // the parameters is in there, we add all recorded assumptions to the // assumed/invalid context. scop->addRecordedAssumptions(); scop->simplifyContexts(); if (!scop->buildAliasChecks(AA)) { LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n"); return; } scop->hoistInvariantLoads(); scop->canonicalizeDynamicBasePtrs(); scop->verifyInvariantLoads(); scop->simplifySCoP(true); // Check late for a feasible runtime context because profitability did not // change. if (!scop->hasFeasibleRuntimeContext()) { LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n"); return; } #ifndef NDEBUG verifyUses(scop.get(), LI, DT); #endif } ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA, const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, ScopDetection &SD, ScalarEvolution &SE, OptimizationRemarkEmitter &ORE) : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) { DebugLoc Beg, End; auto P = getBBPairForRegion(R); getDebugLocations(P, Beg, End); std::string Msg = "SCoP begins here."; ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first) << Msg); buildScop(*R, AC, ORE); LLVM_DEBUG(dbgs() << *scop); if (!scop->hasFeasibleRuntimeContext()) { InfeasibleScops++; Msg = "SCoP ends here but was dismissed."; LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n"); scop.reset(); } else { Msg = "SCoP ends here."; ++ScopFound; if (scop->getMaxLoopDepth() > 0) ++RichScopFound; } if (R->isTopLevelRegion()) ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first) << Msg); else ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second) << Msg); }