//===- ScopInfo.cpp -------------------------------------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Create a polyhedral description for a static control flow region. // // The pass creates a polyhedral description of the Scops detected by the Scop // detection derived from their LLVM-IR code. // // This representation is shared among several tools in the polyhedral // community, which are e.g. Cloog, Pluto, Loopo, Graphite. // //===----------------------------------------------------------------------===// #include "polly/ScopInfo.h" #include "polly/LinkAllPasses.h" #include "polly/Options.h" #include "polly/ScopBuilder.h" #include "polly/ScopDetection.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ISLOStream.h" #include "polly/Support/ISLTools.h" #include "polly/Support/SCEVAffinator.h" #include "polly/Support/SCEVValidator.h" #include "polly/Support/ScopHelper.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" #include "llvm/IR/Use.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "isl/aff.h" #include "isl/constraint.h" #include "isl/local_space.h" #include "isl/map.h" #include "isl/options.h" #include "isl/printer.h" #include "isl/schedule.h" #include "isl/schedule_node.h" #include "isl/set.h" #include "isl/union_map.h" #include "isl/union_set.h" #include "isl/val.h" #include #include #include #include #include #include #include #include #include #include #include using namespace llvm; using namespace polly; #define DEBUG_TYPE "polly-scops" STATISTIC(AssumptionsAliasing, "Number of aliasing assumptions taken."); STATISTIC(AssumptionsInbounds, "Number of inbounds assumptions taken."); STATISTIC(AssumptionsWrapping, "Number of wrapping assumptions taken."); STATISTIC(AssumptionsUnsigned, "Number of unsigned assumptions taken."); STATISTIC(AssumptionsComplexity, "Number of too complex SCoPs."); STATISTIC(AssumptionsUnprofitable, "Number of unprofitable SCoPs."); STATISTIC(AssumptionsErrorBlock, "Number of error block assumptions taken."); STATISTIC(AssumptionsInfiniteLoop, "Number of bounded loop assumptions taken."); STATISTIC(AssumptionsInvariantLoad, "Number of invariant loads assumptions taken."); STATISTIC(AssumptionsDelinearization, "Number of delinearization assumptions taken."); STATISTIC(NumScops, "Number of feasible SCoPs after ScopInfo"); STATISTIC(NumLoopsInScop, "Number of loops in scops"); STATISTIC(NumBoxedLoops, "Number of boxed loops in SCoPs after ScopInfo"); STATISTIC(NumAffineLoops, "Number of affine loops in SCoPs after ScopInfo"); STATISTIC(NumScopsDepthZero, "Number of scops with maximal loop depth 0"); STATISTIC(NumScopsDepthOne, "Number of scops with maximal loop depth 1"); STATISTIC(NumScopsDepthTwo, "Number of scops with maximal loop depth 2"); STATISTIC(NumScopsDepthThree, "Number of scops with maximal loop depth 3"); STATISTIC(NumScopsDepthFour, "Number of scops with maximal loop depth 4"); STATISTIC(NumScopsDepthFive, "Number of scops with maximal loop depth 5"); STATISTIC(NumScopsDepthLarger, "Number of scops with maximal loop depth 6 and larger"); STATISTIC(MaxNumLoopsInScop, "Maximal number of loops in scops"); STATISTIC(NumValueWrites, "Number of scalar value writes after ScopInfo"); STATISTIC( NumValueWritesInLoops, "Number of scalar value writes nested in affine loops after ScopInfo"); STATISTIC(NumPHIWrites, "Number of scalar phi writes after ScopInfo"); STATISTIC(NumPHIWritesInLoops, "Number of scalar phi writes nested in affine loops after ScopInfo"); STATISTIC(NumSingletonWrites, "Number of singleton writes after ScopInfo"); STATISTIC(NumSingletonWritesInLoops, "Number of singleton writes nested in affine loops after ScopInfo"); // The maximal number of basic sets we allow during domain construction to // be created. More complex scops will result in very high compile time and // are also unlikely to result in good code static int const MaxDisjunctsInDomain = 20; // The number of disjunct in the context after which we stop to add more // disjuncts. This parameter is there to avoid exponential growth in the // number of disjunct when adding non-convex sets to the context. static int const MaxDisjunctsInContext = 4; // The maximal number of dimensions we allow during invariant load construction. // More complex access ranges will result in very high compile time and are also // unlikely to result in good code. This value is very high and should only // trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006). static int const MaxDimensionsInAccessRange = 9; static cl::opt OptComputeOut("polly-analysis-computeout", cl::desc("Bound the scop analysis by a maximal amount of " "computational steps (0 means no bound)"), cl::Hidden, cl::init(800000), cl::ZeroOrMore, cl::cat(PollyCategory)); static cl::opt PollyRemarksMinimal( "polly-remarks-minimal", cl::desc("Do not emit remarks about assumptions that are known"), cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::cat(PollyCategory)); static cl::opt RunTimeChecksMaxAccessDisjuncts( "polly-rtc-max-array-disjuncts", cl::desc("The maximal number of disjunts allowed in memory accesses to " "to build RTCs."), cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); static cl::opt RunTimeChecksMaxParameters( "polly-rtc-max-parameters", cl::desc("The maximal number of parameters allowed in RTCs."), cl::Hidden, cl::ZeroOrMore, cl::init(8), cl::cat(PollyCategory)); static cl::opt RunTimeChecksMaxArraysPerGroup( "polly-rtc-max-arrays-per-group", cl::desc("The maximal number of arrays to compare in each alias group."), cl::Hidden, cl::ZeroOrMore, cl::init(20), cl::cat(PollyCategory)); static cl::opt UserContextStr( "polly-context", cl::value_desc("isl parameter set"), cl::desc("Provide additional constraints on the context parameters"), cl::init(""), cl::cat(PollyCategory)); static cl::opt IslOnErrorAbort("polly-on-isl-error-abort", cl::desc("Abort if an isl error is encountered"), cl::init(true), cl::cat(PollyCategory)); static cl::opt PollyPreciseInbounds( "polly-precise-inbounds", cl::desc("Take more precise inbounds assumptions (do not scale well)"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); static cl::opt PollyIgnoreInbounds("polly-ignore-inbounds", cl::desc("Do not take inbounds assumptions at all"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); static cl::opt PollyIgnoreParamBounds( "polly-ignore-parameter-bounds", cl::desc( "Do not add parameter bounds and do no gist simplify sets accordingly"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); static cl::opt PollyAllowDereferenceOfAllFunctionParams( "polly-allow-dereference-of-all-function-parameters", cl::desc( "Treat all parameters to functions that are pointers as dereferencible." " This is useful for invariant load hoisting, since we can generate" " less runtime checks. This is only valid if all pointers to functions" " are always initialized, so that Polly can choose to hoist" " their loads. "), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); static cl::opt PollyPreciseFoldAccesses( "polly-precise-fold-accesses", cl::desc("Fold memory accesses to model more possible delinearizations " "(does not scale well)"), cl::Hidden, cl::init(false), cl::cat(PollyCategory)); bool polly::UseInstructionNames; static cl::opt XUseInstructionNames( "polly-use-llvm-names", cl::desc("Use LLVM-IR names when deriving statement names"), cl::location(UseInstructionNames), cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); static cl::opt PollyPrintInstructions( "polly-print-instructions", cl::desc("Output instructions per ScopStmt"), cl::Hidden, cl::Optional, cl::init(false), cl::cat(PollyCategory)); //===----------------------------------------------------------------------===// // Create a sequence of two schedules. Either argument may be null and is // interpreted as the empty schedule. Can also return null if both schedules are // empty. static isl::schedule combineInSequence(isl::schedule Prev, isl::schedule Succ) { if (!Prev) return Succ; if (!Succ) return Prev; return Prev.sequence(Succ); } static isl::set addRangeBoundsToSet(isl::set S, const ConstantRange &Range, int dim, isl::dim type) { isl::val V; isl::ctx Ctx = S.get_ctx(); // The upper and lower bound for a parameter value is derived either from // the data type of the parameter or from the - possibly more restrictive - // range metadata. V = valFromAPInt(Ctx.get(), Range.getSignedMin(), true); S = S.lower_bound_val(type, dim, V); V = valFromAPInt(Ctx.get(), Range.getSignedMax(), true); S = S.upper_bound_val(type, dim, V); if (Range.isFullSet()) return S; if (S.n_basic_set() > MaxDisjunctsInContext) return S; // In case of signed wrapping, we can refine the set of valid values by // excluding the part not covered by the wrapping range. if (Range.isSignWrappedSet()) { V = valFromAPInt(Ctx.get(), Range.getLower(), true); isl::set SLB = S.lower_bound_val(type, dim, V); V = valFromAPInt(Ctx.get(), Range.getUpper(), true); V = V.sub_ui(1); isl::set SUB = S.upper_bound_val(type, dim, V); S = SLB.unite(SUB); } return S; } static const ScopArrayInfo *identifyBasePtrOriginSAI(Scop *S, Value *BasePtr) { LoadInst *BasePtrLI = dyn_cast(BasePtr); if (!BasePtrLI) return nullptr; if (!S->contains(BasePtrLI)) return nullptr; ScalarEvolution &SE = *S->getSE(); auto *OriginBaseSCEV = SE.getPointerBase(SE.getSCEV(BasePtrLI->getPointerOperand())); if (!OriginBaseSCEV) return nullptr; auto *OriginBaseSCEVUnknown = dyn_cast(OriginBaseSCEV); if (!OriginBaseSCEVUnknown) return nullptr; return S->getScopArrayInfo(OriginBaseSCEVUnknown->getValue(), MemoryKind::Array); } ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl::ctx Ctx, ArrayRef Sizes, MemoryKind Kind, const DataLayout &DL, Scop *S, const char *BaseName) : BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S) { std::string BasePtrName = BaseName ? BaseName : getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(), Kind == MemoryKind::PHI ? "__phi" : "", UseInstructionNames); Id = isl::id::alloc(Ctx, BasePtrName, this); updateSizes(Sizes); if (!BasePtr || Kind != MemoryKind::Array) { BasePtrOriginSAI = nullptr; return; } BasePtrOriginSAI = identifyBasePtrOriginSAI(S, BasePtr); if (BasePtrOriginSAI) const_cast(BasePtrOriginSAI)->addDerivedSAI(this); } ScopArrayInfo::~ScopArrayInfo() = default; isl::space ScopArrayInfo::getSpace() const { auto Space = isl::space(Id.get_ctx(), 0, getNumberOfDimensions()); Space = Space.set_tuple_id(isl::dim::set, Id); return Space; } bool ScopArrayInfo::isReadOnly() { isl::union_set WriteSet = S.getWrites().range(); isl::space Space = getSpace(); WriteSet = WriteSet.extract_set(Space); return bool(WriteSet.is_empty()); } bool ScopArrayInfo::isCompatibleWith(const ScopArrayInfo *Array) const { if (Array->getElementType() != getElementType()) return false; if (Array->getNumberOfDimensions() != getNumberOfDimensions()) return false; for (unsigned i = 0; i < getNumberOfDimensions(); i++) if (Array->getDimensionSize(i) != getDimensionSize(i)) return false; return true; } void ScopArrayInfo::updateElementType(Type *NewElementType) { if (NewElementType == ElementType) return; auto OldElementSize = DL.getTypeAllocSizeInBits(ElementType); auto NewElementSize = DL.getTypeAllocSizeInBits(NewElementType); if (NewElementSize == OldElementSize || NewElementSize == 0) return; if (NewElementSize % OldElementSize == 0 && NewElementSize < OldElementSize) { ElementType = NewElementType; } else { auto GCD = GreatestCommonDivisor64(NewElementSize, OldElementSize); ElementType = IntegerType::get(ElementType->getContext(), GCD); } } /// Make the ScopArrayInfo model a Fortran Array void ScopArrayInfo::applyAndSetFAD(Value *FAD) { assert(FAD && "got invalid Fortran array descriptor"); if (this->FAD) { assert(this->FAD == FAD && "receiving different array descriptors for same array"); return; } assert(DimensionSizesPw.size() > 0 && !DimensionSizesPw[0]); assert(!this->FAD); this->FAD = FAD; isl::space Space(S.getIslCtx(), 1, 0); std::string param_name = getName(); param_name += "_fortranarr_size"; isl::id IdPwAff = isl::id::alloc(S.getIslCtx(), param_name, this); Space = Space.set_dim_id(isl::dim::param, 0, IdPwAff); isl::pw_aff PwAff = isl::aff::var_on_domain(isl::local_space(Space), isl::dim::param, 0); DimensionSizesPw[0] = PwAff; } bool ScopArrayInfo::updateSizes(ArrayRef NewSizes, bool CheckConsistency) { int SharedDims = std::min(NewSizes.size(), DimensionSizes.size()); int ExtraDimsNew = NewSizes.size() - SharedDims; int ExtraDimsOld = DimensionSizes.size() - SharedDims; if (CheckConsistency) { for (int i = 0; i < SharedDims; i++) { auto *NewSize = NewSizes[i + ExtraDimsNew]; auto *KnownSize = DimensionSizes[i + ExtraDimsOld]; if (NewSize && KnownSize && NewSize != KnownSize) return false; } if (DimensionSizes.size() >= NewSizes.size()) return true; } DimensionSizes.clear(); DimensionSizes.insert(DimensionSizes.begin(), NewSizes.begin(), NewSizes.end()); DimensionSizesPw.clear(); for (const SCEV *Expr : DimensionSizes) { if (!Expr) { DimensionSizesPw.push_back(nullptr); continue; } isl::pw_aff Size = S.getPwAffOnly(Expr); DimensionSizesPw.push_back(Size); } return true; } std::string ScopArrayInfo::getName() const { return Id.get_name(); } int ScopArrayInfo::getElemSizeInBytes() const { return DL.getTypeAllocSize(ElementType); } isl::id ScopArrayInfo::getBasePtrId() const { return Id; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ScopArrayInfo::dump() const { print(errs()); } #endif void ScopArrayInfo::print(raw_ostream &OS, bool SizeAsPwAff) const { OS.indent(8) << *getElementType() << " " << getName(); unsigned u = 0; // If this is a Fortran array, then we can print the outermost dimension // as a isl_pw_aff even though there is no SCEV information. bool IsOutermostSizeKnown = SizeAsPwAff && FAD; if (!IsOutermostSizeKnown && getNumberOfDimensions() > 0 && !getDimensionSize(0)) { OS << "[*]"; u++; } for (; u < getNumberOfDimensions(); u++) { OS << "["; if (SizeAsPwAff) { isl::pw_aff Size = getDimensionSizePw(u); OS << " " << Size << " "; } else { OS << *getDimensionSize(u); } OS << "]"; } OS << ";"; if (BasePtrOriginSAI) OS << " [BasePtrOrigin: " << BasePtrOriginSAI->getName() << "]"; OS << " // Element size " << getElemSizeInBytes() << "\n"; } const ScopArrayInfo * ScopArrayInfo::getFromAccessFunction(isl::pw_multi_aff PMA) { isl::id Id = PMA.get_tuple_id(isl::dim::out); assert(!Id.is_null() && "Output dimension didn't have an ID"); return getFromId(Id); } const ScopArrayInfo *ScopArrayInfo::getFromId(isl::id Id) { void *User = Id.get_user(); const ScopArrayInfo *SAI = static_cast(User); return SAI; } void MemoryAccess::wrapConstantDimensions() { auto *SAI = getScopArrayInfo(); isl::space ArraySpace = SAI->getSpace(); isl::ctx Ctx = ArraySpace.get_ctx(); unsigned DimsArray = SAI->getNumberOfDimensions(); isl::multi_aff DivModAff = isl::multi_aff::identity( ArraySpace.map_from_domain_and_range(ArraySpace)); isl::local_space LArraySpace = isl::local_space(ArraySpace); // Begin with last dimension, to iteratively carry into higher dimensions. for (int i = DimsArray - 1; i > 0; i--) { auto *DimSize = SAI->getDimensionSize(i); auto *DimSizeCst = dyn_cast(DimSize); // This transformation is not applicable to dimensions with dynamic size. if (!DimSizeCst) continue; // This transformation is not applicable to dimensions of size zero. if (DimSize->isZero()) continue; isl::val DimSizeVal = valFromAPInt(Ctx.get(), DimSizeCst->getAPInt(), false); isl::aff Var = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i); isl::aff PrevVar = isl::aff::var_on_domain(LArraySpace, isl::dim::set, i - 1); // Compute: index % size // Modulo must apply in the divide of the previous iteration, if any. isl::aff Modulo = Var.mod(DimSizeVal); Modulo = Modulo.pullback(DivModAff); // Compute: floor(index / size) isl::aff Divide = Var.div(isl::aff(LArraySpace, DimSizeVal)); Divide = Divide.floor(); Divide = Divide.add(PrevVar); Divide = Divide.pullback(DivModAff); // Apply Modulo and Divide. DivModAff = DivModAff.set_aff(i, Modulo); DivModAff = DivModAff.set_aff(i - 1, Divide); } // Apply all modulo/divides on the accesses. isl::map Relation = AccessRelation; Relation = Relation.apply_range(isl::map::from_multi_aff(DivModAff)); Relation = Relation.detect_equalities(); AccessRelation = Relation; } void MemoryAccess::updateDimensionality() { auto *SAI = getScopArrayInfo(); isl::space ArraySpace = SAI->getSpace(); isl::space AccessSpace = AccessRelation.get_space().range(); isl::ctx Ctx = ArraySpace.get_ctx(); auto DimsArray = ArraySpace.dim(isl::dim::set); auto DimsAccess = AccessSpace.dim(isl::dim::set); auto DimsMissing = DimsArray - DimsAccess; auto *BB = getStatement()->getEntryBlock(); auto &DL = BB->getModule()->getDataLayout(); unsigned ArrayElemSize = SAI->getElemSizeInBytes(); unsigned ElemBytes = DL.getTypeAllocSize(getElementType()); isl::map Map = isl::map::from_domain_and_range( isl::set::universe(AccessSpace), isl::set::universe(ArraySpace)); for (unsigned i = 0; i < DimsMissing; i++) Map = Map.fix_si(isl::dim::out, i, 0); for (unsigned i = DimsMissing; i < DimsArray; i++) Map = Map.equate(isl::dim::in, i - DimsMissing, isl::dim::out, i); AccessRelation = AccessRelation.apply_range(Map); // For the non delinearized arrays, divide the access function of the last // subscript by the size of the elements in the array. // // A stride one array access in C expressed as A[i] is expressed in // LLVM-IR as something like A[i * elementsize]. This hides the fact that // two subsequent values of 'i' index two values that are stored next to // each other in memory. By this division we make this characteristic // obvious again. If the base pointer was accessed with offsets not divisible // by the accesses element size, we will have chosen a smaller ArrayElemSize // that divides the offsets of all accesses to this base pointer. if (DimsAccess == 1) { isl::val V = isl::val(Ctx, ArrayElemSize); AccessRelation = AccessRelation.floordiv_val(V); } // We currently do this only if we added at least one dimension, which means // some dimension's indices have not been specified, an indicator that some // index values have been added together. // TODO: Investigate general usefulness; Effect on unit tests is to make index // expressions more complicated. if (DimsMissing) wrapConstantDimensions(); if (!isAffine()) computeBoundsOnAccessRelation(ArrayElemSize); // Introduce multi-element accesses in case the type loaded by this memory // access is larger than the canonical element type of the array. // // An access ((float *)A)[i] to an array char *A is modeled as // {[i] -> A[o] : 4 i <= o <= 4 i + 3 if (ElemBytes > ArrayElemSize) { assert(ElemBytes % ArrayElemSize == 0 && "Loaded element size should be multiple of canonical element size"); isl::map Map = isl::map::from_domain_and_range( isl::set::universe(ArraySpace), isl::set::universe(ArraySpace)); for (unsigned i = 0; i < DimsArray - 1; i++) Map = Map.equate(isl::dim::in, i, isl::dim::out, i); isl::constraint C; isl::local_space LS; LS = isl::local_space(Map.get_space()); int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes(); C = isl::constraint::alloc_inequality(LS); C = C.set_constant_val(isl::val(Ctx, Num - 1)); C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1); C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1); Map = Map.add_constraint(C); C = isl::constraint::alloc_inequality(LS); C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1); C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1); C = C.set_constant_val(isl::val(Ctx, 0)); Map = Map.add_constraint(C); AccessRelation = AccessRelation.apply_range(Map); } } const std::string MemoryAccess::getReductionOperatorStr(MemoryAccess::ReductionType RT) { switch (RT) { case MemoryAccess::RT_NONE: llvm_unreachable("Requested a reduction operator string for a memory " "access which isn't a reduction"); case MemoryAccess::RT_ADD: return "+"; case MemoryAccess::RT_MUL: return "*"; case MemoryAccess::RT_BOR: return "|"; case MemoryAccess::RT_BXOR: return "^"; case MemoryAccess::RT_BAND: return "&"; } llvm_unreachable("Unknown reduction type"); } const ScopArrayInfo *MemoryAccess::getOriginalScopArrayInfo() const { isl::id ArrayId = getArrayId(); void *User = ArrayId.get_user(); const ScopArrayInfo *SAI = static_cast(User); return SAI; } const ScopArrayInfo *MemoryAccess::getLatestScopArrayInfo() const { isl::id ArrayId = getLatestArrayId(); void *User = ArrayId.get_user(); const ScopArrayInfo *SAI = static_cast(User); return SAI; } isl::id MemoryAccess::getOriginalArrayId() const { return AccessRelation.get_tuple_id(isl::dim::out); } isl::id MemoryAccess::getLatestArrayId() const { if (!hasNewAccessRelation()) return getOriginalArrayId(); return NewAccessRelation.get_tuple_id(isl::dim::out); } isl::map MemoryAccess::getAddressFunction() const { return getAccessRelation().lexmin(); } isl::pw_multi_aff MemoryAccess::applyScheduleToAccessRelation(isl::union_map USchedule) const { isl::map Schedule, ScheduledAccRel; isl::union_set UDomain; UDomain = getStatement()->getDomain(); USchedule = USchedule.intersect_domain(UDomain); Schedule = isl::map::from_union_map(USchedule); ScheduledAccRel = getAddressFunction().apply_domain(Schedule); return isl::pw_multi_aff::from_map(ScheduledAccRel); } isl::map MemoryAccess::getOriginalAccessRelation() const { return AccessRelation; } std::string MemoryAccess::getOriginalAccessRelationStr() const { return AccessRelation.to_str(); } isl::space MemoryAccess::getOriginalAccessRelationSpace() const { return AccessRelation.get_space(); } isl::map MemoryAccess::getNewAccessRelation() const { return NewAccessRelation; } std::string MemoryAccess::getNewAccessRelationStr() const { return NewAccessRelation.to_str(); } std::string MemoryAccess::getAccessRelationStr() const { return getAccessRelation().to_str(); } isl::basic_map MemoryAccess::createBasicAccessMap(ScopStmt *Statement) { isl::space Space = isl::space(Statement->getIslCtx(), 0, 1); Space = Space.align_params(Statement->getDomainSpace()); return isl::basic_map::from_domain_and_range( isl::basic_set::universe(Statement->getDomainSpace()), isl::basic_set::universe(Space)); } // Formalize no out-of-bound access assumption // // When delinearizing array accesses we optimistically assume that the // delinearized accesses do not access out of bound locations (the subscript // expression of each array evaluates for each statement instance that is // executed to a value that is larger than zero and strictly smaller than the // size of the corresponding dimension). The only exception is the outermost // dimension for which we do not need to assume any upper bound. At this point // we formalize this assumption to ensure that at code generation time the // relevant run-time checks can be generated. // // To find the set of constraints necessary to avoid out of bound accesses, we // first build the set of data locations that are not within array bounds. We // then apply the reverse access relation to obtain the set of iterations that // may contain invalid accesses and reduce this set of iterations to the ones // that are actually executed by intersecting them with the domain of the // statement. If we now project out all loop dimensions, we obtain a set of // parameters that may cause statement instances to be executed that may // possibly yield out of bound memory accesses. The complement of these // constraints is the set of constraints that needs to be assumed to ensure such // statement instances are never executed. void MemoryAccess::assumeNoOutOfBound() { if (PollyIgnoreInbounds) return; auto *SAI = getScopArrayInfo(); isl::space Space = getOriginalAccessRelationSpace().range(); isl::set Outside = isl::set::empty(Space); for (int i = 1, Size = Space.dim(isl::dim::set); i < Size; ++i) { isl::local_space LS(Space); isl::pw_aff Var = isl::pw_aff::var_on_domain(LS, isl::dim::set, i); isl::pw_aff Zero = isl::pw_aff(LS); isl::set DimOutside = Var.lt_set(Zero); isl::pw_aff SizeE = SAI->getDimensionSizePw(i); SizeE = SizeE.add_dims(isl::dim::in, Space.dim(isl::dim::set)); SizeE = SizeE.set_tuple_id(isl::dim::in, Space.get_tuple_id(isl::dim::set)); DimOutside = DimOutside.unite(SizeE.le_set(Var)); Outside = Outside.unite(DimOutside); } Outside = Outside.apply(getAccessRelation().reverse()); Outside = Outside.intersect(Statement->getDomain()); Outside = Outside.params(); // Remove divs to avoid the construction of overly complicated assumptions. // Doing so increases the set of parameter combinations that are assumed to // not appear. This is always save, but may make the resulting run-time check // bail out more often than strictly necessary. Outside = Outside.remove_divs(); Outside = Outside.complement(); const auto &Loc = getAccessInstruction() ? getAccessInstruction()->getDebugLoc() : DebugLoc(); if (!PollyPreciseInbounds) Outside = Outside.gist_params(Statement->getDomain().params()); Statement->getParent()->recordAssumption(INBOUNDS, Outside, Loc, AS_ASSUMPTION); } void MemoryAccess::buildMemIntrinsicAccessRelation() { assert(isMemoryIntrinsic()); assert(Subscripts.size() == 2 && Sizes.size() == 1); isl::pw_aff SubscriptPWA = getPwAff(Subscripts[0]); isl::map SubscriptMap = isl::map::from_pw_aff(SubscriptPWA); isl::map LengthMap; if (Subscripts[1] == nullptr) { LengthMap = isl::map::universe(SubscriptMap.get_space()); } else { isl::pw_aff LengthPWA = getPwAff(Subscripts[1]); LengthMap = isl::map::from_pw_aff(LengthPWA); isl::space RangeSpace = LengthMap.get_space().range(); LengthMap = LengthMap.apply_range(isl::map::lex_gt(RangeSpace)); } LengthMap = LengthMap.lower_bound_si(isl::dim::out, 0, 0); LengthMap = LengthMap.align_params(SubscriptMap.get_space()); SubscriptMap = SubscriptMap.align_params(LengthMap.get_space()); LengthMap = LengthMap.sum(SubscriptMap); AccessRelation = LengthMap.set_tuple_id(isl::dim::in, getStatement()->getDomainId()); } void MemoryAccess::computeBoundsOnAccessRelation(unsigned ElementSize) { ScalarEvolution *SE = Statement->getParent()->getSE(); auto MAI = MemAccInst(getAccessInstruction()); if (isa(MAI)) return; Value *Ptr = MAI.getPointerOperand(); if (!Ptr || !SE->isSCEVable(Ptr->getType())) return; auto *PtrSCEV = SE->getSCEV(Ptr); if (isa(PtrSCEV)) return; auto *BasePtrSCEV = SE->getPointerBase(PtrSCEV); if (BasePtrSCEV && !isa(BasePtrSCEV)) PtrSCEV = SE->getMinusSCEV(PtrSCEV, BasePtrSCEV); const ConstantRange &Range = SE->getSignedRange(PtrSCEV); if (Range.isFullSet()) return; if (Range.isWrappedSet() || Range.isSignWrappedSet()) return; bool isWrapping = Range.isSignWrappedSet(); unsigned BW = Range.getBitWidth(); const auto One = APInt(BW, 1); const auto LB = isWrapping ? Range.getLower() : Range.getSignedMin(); const auto UB = isWrapping ? (Range.getUpper() - One) : Range.getSignedMax(); auto Min = LB.sdiv(APInt(BW, ElementSize)); auto Max = UB.sdiv(APInt(BW, ElementSize)) + One; assert(Min.sle(Max) && "Minimum expected to be less or equal than max"); isl::map Relation = AccessRelation; isl::set AccessRange = Relation.range(); AccessRange = addRangeBoundsToSet(AccessRange, ConstantRange(Min, Max), 0, isl::dim::set); AccessRelation = Relation.intersect_range(AccessRange); } void MemoryAccess::foldAccessRelation() { if (Sizes.size() < 2 || isa(Sizes[1])) return; int Size = Subscripts.size(); isl::map NewAccessRelation = AccessRelation; for (int i = Size - 2; i >= 0; --i) { isl::space Space; isl::map MapOne, MapTwo; isl::pw_aff DimSize = getPwAff(Sizes[i + 1]); isl::space SpaceSize = DimSize.get_space(); isl::id ParamId = SpaceSize.get_dim_id(isl::dim::param, 0); Space = AccessRelation.get_space(); Space = Space.range().map_from_set(); Space = Space.align_params(SpaceSize); int ParamLocation = Space.find_dim_by_id(isl::dim::param, ParamId); MapOne = isl::map::universe(Space); for (int j = 0; j < Size; ++j) MapOne = MapOne.equate(isl::dim::in, j, isl::dim::out, j); MapOne = MapOne.lower_bound_si(isl::dim::in, i + 1, 0); MapTwo = isl::map::universe(Space); for (int j = 0; j < Size; ++j) if (j < i || j > i + 1) MapTwo = MapTwo.equate(isl::dim::in, j, isl::dim::out, j); isl::local_space LS(Space); isl::constraint C; C = isl::constraint::alloc_equality(LS); C = C.set_constant_si(-1); C = C.set_coefficient_si(isl::dim::in, i, 1); C = C.set_coefficient_si(isl::dim::out, i, -1); MapTwo = MapTwo.add_constraint(C); C = isl::constraint::alloc_equality(LS); C = C.set_coefficient_si(isl::dim::in, i + 1, 1); C = C.set_coefficient_si(isl::dim::out, i + 1, -1); C = C.set_coefficient_si(isl::dim::param, ParamLocation, 1); MapTwo = MapTwo.add_constraint(C); MapTwo = MapTwo.upper_bound_si(isl::dim::in, i + 1, -1); MapOne = MapOne.unite(MapTwo); NewAccessRelation = NewAccessRelation.apply_range(MapOne); } isl::id BaseAddrId = getScopArrayInfo()->getBasePtrId(); isl::space Space = Statement->getDomainSpace(); NewAccessRelation = NewAccessRelation.set_tuple_id( isl::dim::in, Space.get_tuple_id(isl::dim::set)); NewAccessRelation = NewAccessRelation.set_tuple_id(isl::dim::out, BaseAddrId); NewAccessRelation = NewAccessRelation.gist_domain(Statement->getDomain()); // Access dimension folding might in certain cases increase the number of // disjuncts in the memory access, which can possibly complicate the generated // run-time checks and can lead to costly compilation. if (!PollyPreciseFoldAccesses && isl_map_n_basic_map(NewAccessRelation.get()) > isl_map_n_basic_map(AccessRelation.get())) { } else { AccessRelation = NewAccessRelation; } } /// Check if @p Expr is divisible by @p Size. static bool isDivisible(const SCEV *Expr, unsigned Size, ScalarEvolution &SE) { assert(Size != 0); if (Size == 1) return true; // Only one factor needs to be divisible. if (auto *MulExpr = dyn_cast(Expr)) { for (auto *FactorExpr : MulExpr->operands()) if (isDivisible(FactorExpr, Size, SE)) return true; return false; } // For other n-ary expressions (Add, AddRec, Max,...) all operands need // to be divisible. if (auto *NAryExpr = dyn_cast(Expr)) { for (auto *OpExpr : NAryExpr->operands()) if (!isDivisible(OpExpr, Size, SE)) return false; return true; } auto *SizeSCEV = SE.getConstant(Expr->getType(), Size); auto *UDivSCEV = SE.getUDivExpr(Expr, SizeSCEV); auto *MulSCEV = SE.getMulExpr(UDivSCEV, SizeSCEV); return MulSCEV == Expr; } void MemoryAccess::buildAccessRelation(const ScopArrayInfo *SAI) { assert(AccessRelation.is_null() && "AccessRelation already built"); // Initialize the invalid domain which describes all iterations for which the // access relation is not modeled correctly. isl::set StmtInvalidDomain = getStatement()->getInvalidDomain(); InvalidDomain = isl::set::empty(StmtInvalidDomain.get_space()); isl::ctx Ctx = Id.get_ctx(); isl::id BaseAddrId = SAI->getBasePtrId(); if (getAccessInstruction() && isa(getAccessInstruction())) { buildMemIntrinsicAccessRelation(); AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId); return; } if (!isAffine()) { // We overapproximate non-affine accesses with a possible access to the // whole array. For read accesses it does not make a difference, if an // access must or may happen. However, for write accesses it is important to // differentiate between writes that must happen and writes that may happen. if (AccessRelation.is_null()) AccessRelation = createBasicAccessMap(Statement); AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId); return; } isl::space Space = isl::space(Ctx, 0, Statement->getNumIterators(), 0); AccessRelation = isl::map::universe(Space); for (int i = 0, Size = Subscripts.size(); i < Size; ++i) { isl::pw_aff Affine = getPwAff(Subscripts[i]); isl::map SubscriptMap = isl::map::from_pw_aff(Affine); AccessRelation = AccessRelation.flat_range_product(SubscriptMap); } Space = Statement->getDomainSpace(); AccessRelation = AccessRelation.set_tuple_id( isl::dim::in, Space.get_tuple_id(isl::dim::set)); AccessRelation = AccessRelation.set_tuple_id(isl::dim::out, BaseAddrId); AccessRelation = AccessRelation.gist_domain(Statement->getDomain()); } MemoryAccess::MemoryAccess(ScopStmt *Stmt, Instruction *AccessInst, AccessType AccType, Value *BaseAddress, Type *ElementType, bool Affine, ArrayRef Subscripts, ArrayRef Sizes, Value *AccessValue, MemoryKind Kind) : Kind(Kind), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr), BaseAddr(BaseAddress), ElementType(ElementType), Sizes(Sizes.begin(), Sizes.end()), AccessInstruction(AccessInst), AccessValue(AccessValue), IsAffine(Affine), Subscripts(Subscripts.begin(), Subscripts.end()), AccessRelation(nullptr), NewAccessRelation(nullptr), FAD(nullptr) { static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"}; const std::string Access = TypeStrings[AccType] + utostr(Stmt->size()); std::string IdName = Stmt->getBaseName() + Access; Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this); } MemoryAccess::MemoryAccess(ScopStmt *Stmt, AccessType AccType, isl::map AccRel) : Kind(MemoryKind::Array), AccType(AccType), Statement(Stmt), InvalidDomain(nullptr), AccessRelation(nullptr), NewAccessRelation(AccRel), FAD(nullptr) { isl::id ArrayInfoId = NewAccessRelation.get_tuple_id(isl::dim::out); auto *SAI = ScopArrayInfo::getFromId(ArrayInfoId); Sizes.push_back(nullptr); for (unsigned i = 1; i < SAI->getNumberOfDimensions(); i++) Sizes.push_back(SAI->getDimensionSize(i)); ElementType = SAI->getElementType(); BaseAddr = SAI->getBasePtr(); static const std::string TypeStrings[] = {"", "_Read", "_Write", "_MayWrite"}; const std::string Access = TypeStrings[AccType] + utostr(Stmt->size()); std::string IdName = Stmt->getBaseName() + Access; Id = isl::id::alloc(Stmt->getParent()->getIslCtx(), IdName, this); } MemoryAccess::~MemoryAccess() = default; void MemoryAccess::realignParams() { isl::set Ctx = Statement->getParent()->getContext(); InvalidDomain = InvalidDomain.gist_params(Ctx); AccessRelation = AccessRelation.gist_params(Ctx); } const std::string MemoryAccess::getReductionOperatorStr() const { return MemoryAccess::getReductionOperatorStr(getReductionType()); } isl::id MemoryAccess::getId() const { return Id; } raw_ostream &polly::operator<<(raw_ostream &OS, MemoryAccess::ReductionType RT) { if (RT == MemoryAccess::RT_NONE) OS << "NONE"; else OS << MemoryAccess::getReductionOperatorStr(RT); return OS; } void MemoryAccess::setFortranArrayDescriptor(Value *FAD) { this->FAD = FAD; } void MemoryAccess::print(raw_ostream &OS) const { switch (AccType) { case READ: OS.indent(12) << "ReadAccess :=\t"; break; case MUST_WRITE: OS.indent(12) << "MustWriteAccess :=\t"; break; case MAY_WRITE: OS.indent(12) << "MayWriteAccess :=\t"; break; } OS << "[Reduction Type: " << getReductionType() << "] "; if (FAD) { OS << "[Fortran array descriptor: " << FAD->getName(); OS << "] "; }; OS << "[Scalar: " << isScalarKind() << "]\n"; OS.indent(16) << getOriginalAccessRelationStr() << ";\n"; if (hasNewAccessRelation()) OS.indent(11) << "new: " << getNewAccessRelationStr() << ";\n"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void MemoryAccess::dump() const { print(errs()); } #endif isl::pw_aff MemoryAccess::getPwAff(const SCEV *E) { auto *Stmt = getStatement(); PWACtx PWAC = Stmt->getParent()->getPwAff(E, Stmt->getEntryBlock()); isl::set StmtDom = getStatement()->getDomain(); StmtDom = StmtDom.reset_tuple_id(); isl::set NewInvalidDom = StmtDom.intersect(PWAC.second); InvalidDomain = InvalidDomain.unite(NewInvalidDom); return PWAC.first; } // Create a map in the size of the provided set domain, that maps from the // one element of the provided set domain to another element of the provided // set domain. // The mapping is limited to all points that are equal in all but the last // dimension and for which the last dimension of the input is strict smaller // than the last dimension of the output. // // getEqualAndLarger(set[i0, i1, ..., iX]): // // set[i0, i1, ..., iX] -> set[o0, o1, ..., oX] // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1), iX < oX // static isl::map getEqualAndLarger(isl::space SetDomain) { isl::space Space = SetDomain.map_from_set(); isl::map Map = isl::map::universe(Space); unsigned lastDimension = Map.dim(isl::dim::in) - 1; // Set all but the last dimension to be equal for the input and output // // input[i0, i1, ..., iX] -> output[o0, o1, ..., oX] // : i0 = o0, i1 = o1, ..., i(X-1) = o(X-1) for (unsigned i = 0; i < lastDimension; ++i) Map = Map.equate(isl::dim::in, i, isl::dim::out, i); // Set the last dimension of the input to be strict smaller than the // last dimension of the output. // // input[?,?,?,...,iX] -> output[?,?,?,...,oX] : iX < oX Map = Map.order_lt(isl::dim::in, lastDimension, isl::dim::out, lastDimension); return Map; } isl::set MemoryAccess::getStride(isl::map Schedule) const { isl::map AccessRelation = getAccessRelation(); isl::space Space = Schedule.get_space().range(); isl::map NextScatt = getEqualAndLarger(Space); Schedule = Schedule.reverse(); NextScatt = NextScatt.lexmin(); NextScatt = NextScatt.apply_range(Schedule); NextScatt = NextScatt.apply_range(AccessRelation); NextScatt = NextScatt.apply_domain(Schedule); NextScatt = NextScatt.apply_domain(AccessRelation); isl::set Deltas = NextScatt.deltas(); return Deltas; } bool MemoryAccess::isStrideX(isl::map Schedule, int StrideWidth) const { isl::set Stride, StrideX; bool IsStrideX; Stride = getStride(Schedule); StrideX = isl::set::universe(Stride.get_space()); for (unsigned i = 0; i < StrideX.dim(isl::dim::set) - 1; i++) StrideX = StrideX.fix_si(isl::dim::set, i, 0); StrideX = StrideX.fix_si(isl::dim::set, StrideX.dim(isl::dim::set) - 1, StrideWidth); IsStrideX = Stride.is_subset(StrideX); return IsStrideX; } bool MemoryAccess::isStrideZero(isl::map Schedule) const { return isStrideX(Schedule, 0); } bool MemoryAccess::isStrideOne(isl::map Schedule) const { return isStrideX(Schedule, 1); } void MemoryAccess::setAccessRelation(isl::map NewAccess) { AccessRelation = NewAccess; } void MemoryAccess::setNewAccessRelation(isl::map NewAccess) { assert(NewAccess); #ifndef NDEBUG // Check domain space compatibility. isl::space NewSpace = NewAccess.get_space(); isl::space NewDomainSpace = NewSpace.domain(); isl::space OriginalDomainSpace = getStatement()->getDomainSpace(); assert(OriginalDomainSpace.has_equal_tuples(NewDomainSpace)); // Reads must be executed unconditionally. Writes might be executed in a // subdomain only. if (isRead()) { // Check whether there is an access for every statement instance. isl::set StmtDomain = getStatement()->getDomain(); StmtDomain = StmtDomain.intersect_params(getStatement()->getParent()->getContext()); isl::set NewDomain = NewAccess.domain(); assert(StmtDomain.is_subset(NewDomain) && "Partial READ accesses not supported"); } isl::space NewAccessSpace = NewAccess.get_space(); assert(NewAccessSpace.has_tuple_id(isl::dim::set) && "Must specify the array that is accessed"); isl::id NewArrayId = NewAccessSpace.get_tuple_id(isl::dim::set); auto *SAI = static_cast(NewArrayId.get_user()); assert(SAI && "Must set a ScopArrayInfo"); if (SAI->isArrayKind() && SAI->getBasePtrOriginSAI()) { InvariantEquivClassTy *EqClass = getStatement()->getParent()->lookupInvariantEquivClass( SAI->getBasePtr()); assert(EqClass && "Access functions to indirect arrays must have an invariant and " "hoisted base pointer"); } // Check whether access dimensions correspond to number of dimensions of the // accesses array. auto Dims = SAI->getNumberOfDimensions(); assert(NewAccessSpace.dim(isl::dim::set) == Dims && "Access dims must match array dims"); #endif NewAccess = NewAccess.gist_domain(getStatement()->getDomain()); NewAccessRelation = NewAccess; } bool MemoryAccess::isLatestPartialAccess() const { isl::set StmtDom = getStatement()->getDomain(); isl::set AccDom = getLatestAccessRelation().domain(); return !StmtDom.is_subset(AccDom); } //===----------------------------------------------------------------------===// isl::map ScopStmt::getSchedule() const { isl::set Domain = getDomain(); if (Domain.is_empty()) return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace()))); auto Schedule = getParent()->getSchedule(); if (!Schedule) return nullptr; Schedule = Schedule.intersect_domain(isl::union_set(Domain)); if (Schedule.is_empty()) return isl::map::from_aff(isl::aff(isl::local_space(getDomainSpace()))); isl::map M = M.from_union_map(Schedule); M = M.coalesce(); M = M.gist_domain(Domain); M = M.coalesce(); return M; } void ScopStmt::restrictDomain(isl::set NewDomain) { assert(NewDomain.is_subset(Domain) && "New domain is not a subset of old domain!"); Domain = NewDomain; } void ScopStmt::addAccess(MemoryAccess *Access, bool Prepend) { Instruction *AccessInst = Access->getAccessInstruction(); if (Access->isArrayKind()) { MemoryAccessList &MAL = InstructionToAccess[AccessInst]; MAL.emplace_front(Access); } else if (Access->isValueKind() && Access->isWrite()) { Instruction *AccessVal = cast(Access->getAccessValue()); assert(!ValueWrites.lookup(AccessVal)); ValueWrites[AccessVal] = Access; } else if (Access->isValueKind() && Access->isRead()) { Value *AccessVal = Access->getAccessValue(); assert(!ValueReads.lookup(AccessVal)); ValueReads[AccessVal] = Access; } else if (Access->isAnyPHIKind() && Access->isWrite()) { PHINode *PHI = cast(Access->getAccessValue()); assert(!PHIWrites.lookup(PHI)); PHIWrites[PHI] = Access; } else if (Access->isAnyPHIKind() && Access->isRead()) { PHINode *PHI = cast(Access->getAccessValue()); assert(!PHIReads.lookup(PHI)); PHIReads[PHI] = Access; } if (Prepend) { MemAccs.insert(MemAccs.begin(), Access); return; } MemAccs.push_back(Access); } void ScopStmt::realignParams() { for (MemoryAccess *MA : *this) MA->realignParams(); isl::set Ctx = Parent.getContext(); InvalidDomain = InvalidDomain.gist_params(Ctx); Domain = Domain.gist_params(Ctx); } /// Add @p BSet to set @p BoundedParts if @p BSet is bounded. static isl::set collectBoundedParts(isl::set S) { isl::set BoundedParts = isl::set::empty(S.get_space()); S.foreach_basic_set([&BoundedParts](isl::basic_set BSet) -> isl::stat { if (BSet.is_bounded()) { BoundedParts = BoundedParts.unite(isl::set(BSet)); } return isl::stat::ok; }); return BoundedParts; } /// Compute the (un)bounded parts of @p S wrt. to dimension @p Dim. /// /// @returns A separation of @p S into first an unbounded then a bounded subset, /// both with regards to the dimension @p Dim. static std::pair partitionSetParts(isl::set S, unsigned Dim) { for (unsigned u = 0, e = S.n_dim(); u < e; u++) S = S.lower_bound_si(isl::dim::set, u, 0); unsigned NumDimsS = S.n_dim(); isl::set OnlyDimS = S; // Remove dimensions that are greater than Dim as they are not interesting. assert(NumDimsS >= Dim + 1); OnlyDimS = OnlyDimS.project_out(isl::dim::set, Dim + 1, NumDimsS - Dim - 1); // Create artificial parametric upper bounds for dimensions smaller than Dim // as we are not interested in them. OnlyDimS = OnlyDimS.insert_dims(isl::dim::param, 0, Dim); for (unsigned u = 0; u < Dim; u++) { isl::constraint C = isl::constraint::alloc_inequality( isl::local_space(OnlyDimS.get_space())); C = C.set_coefficient_si(isl::dim::param, u, 1); C = C.set_coefficient_si(isl::dim::set, u, -1); OnlyDimS = OnlyDimS.add_constraint(C); } // Collect all bounded parts of OnlyDimS. isl::set BoundedParts = collectBoundedParts(OnlyDimS); // Create the dimensions greater than Dim again. BoundedParts = BoundedParts.insert_dims(isl::dim::set, Dim + 1, NumDimsS - Dim - 1); // Remove the artificial upper bound parameters again. BoundedParts = BoundedParts.remove_dims(isl::dim::param, 0, Dim); isl::set UnboundedParts = S.subtract(BoundedParts); return std::make_pair(UnboundedParts, BoundedParts); } /// Create the conditions under which @p L @p Pred @p R is true. static isl::set buildConditionSet(ICmpInst::Predicate Pred, isl::pw_aff L, isl::pw_aff R) { switch (Pred) { case ICmpInst::ICMP_EQ: return L.eq_set(R); case ICmpInst::ICMP_NE: return L.ne_set(R); case ICmpInst::ICMP_SLT: return L.lt_set(R); case ICmpInst::ICMP_SLE: return L.le_set(R); case ICmpInst::ICMP_SGT: return L.gt_set(R); case ICmpInst::ICMP_SGE: return L.ge_set(R); case ICmpInst::ICMP_ULT: return L.lt_set(R); case ICmpInst::ICMP_UGT: return L.gt_set(R); case ICmpInst::ICMP_ULE: return L.le_set(R); case ICmpInst::ICMP_UGE: return L.ge_set(R); default: llvm_unreachable("Non integer predicate not supported"); } } /// Compute the isl representation for the SCEV @p E in this BB. /// /// @param S The Scop in which @p BB resides in. /// @param BB The BB for which isl representation is to be /// computed. /// @param InvalidDomainMap A map of BB to their invalid domains. /// @param E The SCEV that should be translated. /// @param NonNegative Flag to indicate the @p E has to be non-negative. /// /// Note that this function will also adjust the invalid context accordingly. __isl_give isl_pw_aff * getPwAff(Scop &S, BasicBlock *BB, DenseMap &InvalidDomainMap, const SCEV *E, bool NonNegative = false) { PWACtx PWAC = S.getPwAff(E, BB, NonNegative); InvalidDomainMap[BB] = InvalidDomainMap[BB].unite(PWAC.second); return PWAC.first.release(); } /// Build the conditions sets for the switch @p SI in the @p Domain. /// /// This will fill @p ConditionSets with the conditions under which control /// will be moved from @p SI to its successors. Hence, @p ConditionSets will /// have as many elements as @p SI has successors. bool buildConditionSets(Scop &S, BasicBlock *BB, SwitchInst *SI, Loop *L, __isl_keep isl_set *Domain, DenseMap &InvalidDomainMap, SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { Value *Condition = getConditionFromTerminator(SI); assert(Condition && "No condition for switch"); ScalarEvolution &SE = *S.getSE(); isl_pw_aff *LHS, *RHS; LHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEVAtScope(Condition, L)); unsigned NumSuccessors = SI->getNumSuccessors(); ConditionSets.resize(NumSuccessors); for (auto &Case : SI->cases()) { unsigned Idx = Case.getSuccessorIndex(); ConstantInt *CaseValue = Case.getCaseValue(); RHS = getPwAff(S, BB, InvalidDomainMap, SE.getSCEV(CaseValue)); isl_set *CaseConditionSet = buildConditionSet(ICmpInst::ICMP_EQ, isl::manage_copy(LHS), isl::manage(RHS)) .release(); ConditionSets[Idx] = isl_set_coalesce( isl_set_intersect(CaseConditionSet, isl_set_copy(Domain))); } assert(ConditionSets[0] == nullptr && "Default condition set was set"); isl_set *ConditionSetUnion = isl_set_copy(ConditionSets[1]); for (unsigned u = 2; u < NumSuccessors; u++) ConditionSetUnion = isl_set_union(ConditionSetUnion, isl_set_copy(ConditionSets[u])); ConditionSets[0] = isl_set_subtract(isl_set_copy(Domain), ConditionSetUnion); isl_pw_aff_free(LHS); return true; } /// Build condition sets for unsigned ICmpInst(s). /// Special handling is required for unsigned operands to ensure that if /// MSB (aka the Sign bit) is set for an operands in an unsigned ICmpInst /// it should wrap around. /// /// @param IsStrictUpperBound holds information on the predicate relation /// between TestVal and UpperBound, i.e, /// TestVal < UpperBound OR TestVal <= UpperBound __isl_give isl_set * buildUnsignedConditionSets(Scop &S, BasicBlock *BB, Value *Condition, __isl_keep isl_set *Domain, const SCEV *SCEV_TestVal, const SCEV *SCEV_UpperBound, DenseMap &InvalidDomainMap, bool IsStrictUpperBound) { // Do not take NonNeg assumption on TestVal // as it might have MSB (Sign bit) set. isl_pw_aff *TestVal = getPwAff(S, BB, InvalidDomainMap, SCEV_TestVal, false); // Take NonNeg assumption on UpperBound. isl_pw_aff *UpperBound = getPwAff(S, BB, InvalidDomainMap, SCEV_UpperBound, true); // 0 <= TestVal isl_set *First = isl_pw_aff_le_set(isl_pw_aff_zero_on_domain(isl_local_space_from_space( isl_pw_aff_get_domain_space(TestVal))), isl_pw_aff_copy(TestVal)); isl_set *Second; if (IsStrictUpperBound) // TestVal < UpperBound Second = isl_pw_aff_lt_set(TestVal, UpperBound); else // TestVal <= UpperBound Second = isl_pw_aff_le_set(TestVal, UpperBound); isl_set *ConsequenceCondSet = isl_set_intersect(First, Second); return ConsequenceCondSet; } /// Build the conditions sets for the branch condition @p Condition in /// the @p Domain. /// /// This will fill @p ConditionSets with the conditions under which control /// will be moved from @p TI to its successors. Hence, @p ConditionSets will /// have as many elements as @p TI has successors. If @p TI is nullptr the /// context under which @p Condition is true/false will be returned as the /// new elements of @p ConditionSets. bool buildConditionSets(Scop &S, BasicBlock *BB, Value *Condition, TerminatorInst *TI, Loop *L, __isl_keep isl_set *Domain, DenseMap &InvalidDomainMap, SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { ScalarEvolution &SE = *S.getSE(); isl_set *ConsequenceCondSet = nullptr; if (auto Load = dyn_cast(Condition)) { const SCEV *LHSSCEV = SE.getSCEVAtScope(Load, L); const SCEV *RHSSCEV = SE.getZero(LHSSCEV->getType()); bool NonNeg = false; isl_pw_aff *LHS = getPwAff(S, BB, InvalidDomainMap, LHSSCEV, NonNeg); isl_pw_aff *RHS = getPwAff(S, BB, InvalidDomainMap, RHSSCEV, NonNeg); ConsequenceCondSet = buildConditionSet(ICmpInst::ICMP_SLE, isl::manage(LHS), isl::manage(RHS)) .release(); } else if (auto *PHI = dyn_cast(Condition)) { auto *Unique = dyn_cast( getUniqueNonErrorValue(PHI, &S.getRegion(), *S.getLI(), *S.getDT())); if (Unique->isZero()) ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain)); else ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain)); } else if (auto *CCond = dyn_cast(Condition)) { if (CCond->isZero()) ConsequenceCondSet = isl_set_empty(isl_set_get_space(Domain)); else ConsequenceCondSet = isl_set_universe(isl_set_get_space(Domain)); } else if (BinaryOperator *BinOp = dyn_cast(Condition)) { auto Opcode = BinOp->getOpcode(); assert(Opcode == Instruction::And || Opcode == Instruction::Or); bool Valid = buildConditionSets(S, BB, BinOp->getOperand(0), TI, L, Domain, InvalidDomainMap, ConditionSets) && buildConditionSets(S, BB, BinOp->getOperand(1), TI, L, Domain, InvalidDomainMap, ConditionSets); if (!Valid) { while (!ConditionSets.empty()) isl_set_free(ConditionSets.pop_back_val()); return false; } isl_set_free(ConditionSets.pop_back_val()); isl_set *ConsCondPart0 = ConditionSets.pop_back_val(); isl_set_free(ConditionSets.pop_back_val()); isl_set *ConsCondPart1 = ConditionSets.pop_back_val(); if (Opcode == Instruction::And) ConsequenceCondSet = isl_set_intersect(ConsCondPart0, ConsCondPart1); else ConsequenceCondSet = isl_set_union(ConsCondPart0, ConsCondPart1); } else { auto *ICond = dyn_cast(Condition); assert(ICond && "Condition of exiting branch was neither constant nor ICmp!"); LoopInfo &LI = *S.getLI(); DominatorTree &DT = *S.getDT(); Region &R = S.getRegion(); isl_pw_aff *LHS, *RHS; // For unsigned comparisons we assumed the signed bit of neither operand // to be set. The comparison is equal to a signed comparison under this // assumption. bool NonNeg = ICond->isUnsigned(); const SCEV *LeftOperand = SE.getSCEVAtScope(ICond->getOperand(0), L), *RightOperand = SE.getSCEVAtScope(ICond->getOperand(1), L); LeftOperand = tryForwardThroughPHI(LeftOperand, R, SE, LI, DT); RightOperand = tryForwardThroughPHI(RightOperand, R, SE, LI, DT); switch (ICond->getPredicate()) { case ICmpInst::ICMP_ULT: ConsequenceCondSet = buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand, RightOperand, InvalidDomainMap, true); break; case ICmpInst::ICMP_ULE: ConsequenceCondSet = buildUnsignedConditionSets(S, BB, Condition, Domain, LeftOperand, RightOperand, InvalidDomainMap, false); break; case ICmpInst::ICMP_UGT: ConsequenceCondSet = buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand, LeftOperand, InvalidDomainMap, true); break; case ICmpInst::ICMP_UGE: ConsequenceCondSet = buildUnsignedConditionSets(S, BB, Condition, Domain, RightOperand, LeftOperand, InvalidDomainMap, false); break; default: LHS = getPwAff(S, BB, InvalidDomainMap, LeftOperand, NonNeg); RHS = getPwAff(S, BB, InvalidDomainMap, RightOperand, NonNeg); ConsequenceCondSet = buildConditionSet(ICond->getPredicate(), isl::manage(LHS), isl::manage(RHS)) .release(); break; } } // If no terminator was given we are only looking for parameter constraints // under which @p Condition is true/false. if (!TI) ConsequenceCondSet = isl_set_params(ConsequenceCondSet); assert(ConsequenceCondSet); ConsequenceCondSet = isl_set_coalesce( isl_set_intersect(ConsequenceCondSet, isl_set_copy(Domain))); isl_set *AlternativeCondSet = nullptr; bool TooComplex = isl_set_n_basic_set(ConsequenceCondSet) >= MaxDisjunctsInDomain; if (!TooComplex) { AlternativeCondSet = isl_set_subtract(isl_set_copy(Domain), isl_set_copy(ConsequenceCondSet)); TooComplex = isl_set_n_basic_set(AlternativeCondSet) >= MaxDisjunctsInDomain; } if (TooComplex) { S.invalidate(COMPLEXITY, TI ? TI->getDebugLoc() : DebugLoc(), TI ? TI->getParent() : nullptr /* BasicBlock */); isl_set_free(AlternativeCondSet); isl_set_free(ConsequenceCondSet); return false; } ConditionSets.push_back(ConsequenceCondSet); ConditionSets.push_back(isl_set_coalesce(AlternativeCondSet)); return true; } /// Build the conditions sets for the terminator @p TI in the @p Domain. /// /// This will fill @p ConditionSets with the conditions under which control /// will be moved from @p TI to its successors. Hence, @p ConditionSets will /// have as many elements as @p TI has successors. bool buildConditionSets(Scop &S, BasicBlock *BB, TerminatorInst *TI, Loop *L, __isl_keep isl_set *Domain, DenseMap &InvalidDomainMap, SmallVectorImpl<__isl_give isl_set *> &ConditionSets) { if (SwitchInst *SI = dyn_cast(TI)) return buildConditionSets(S, BB, SI, L, Domain, InvalidDomainMap, ConditionSets); assert(isa(TI) && "Terminator was neither branch nor switch."); if (TI->getNumSuccessors() == 1) { ConditionSets.push_back(isl_set_copy(Domain)); return true; } Value *Condition = getConditionFromTerminator(TI); assert(Condition && "No condition for Terminator"); return buildConditionSets(S, BB, Condition, TI, L, Domain, InvalidDomainMap, ConditionSets); } ScopStmt::ScopStmt(Scop &parent, Region &R, StringRef Name, Loop *SurroundingLoop, std::vector EntryBlockInstructions) : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), R(&R), Build(nullptr), BaseName(Name), SurroundingLoop(SurroundingLoop), Instructions(EntryBlockInstructions) {} ScopStmt::ScopStmt(Scop &parent, BasicBlock &bb, StringRef Name, Loop *SurroundingLoop, std::vector Instructions) : Parent(parent), InvalidDomain(nullptr), Domain(nullptr), BB(&bb), Build(nullptr), BaseName(Name), SurroundingLoop(SurroundingLoop), Instructions(Instructions) {} ScopStmt::ScopStmt(Scop &parent, isl::map SourceRel, isl::map TargetRel, isl::set NewDomain) : Parent(parent), InvalidDomain(nullptr), Domain(NewDomain), Build(nullptr) { BaseName = getIslCompatibleName("CopyStmt_", "", std::to_string(parent.getCopyStmtsNum())); isl::id Id = isl::id::alloc(getIslCtx(), getBaseName(), this); Domain = Domain.set_tuple_id(Id); TargetRel = TargetRel.set_tuple_id(isl::dim::in, Id); auto *Access = new MemoryAccess(this, MemoryAccess::AccessType::MUST_WRITE, TargetRel); parent.addAccessFunction(Access); addAccess(Access); SourceRel = SourceRel.set_tuple_id(isl::dim::in, Id); Access = new MemoryAccess(this, MemoryAccess::AccessType::READ, SourceRel); parent.addAccessFunction(Access); addAccess(Access); } ScopStmt::~ScopStmt() = default; std::string ScopStmt::getDomainStr() const { return Domain.to_str(); } std::string ScopStmt::getScheduleStr() const { auto *S = getSchedule().release(); if (!S) return {}; auto Str = stringFromIslObj(S); isl_map_free(S); return Str; } void ScopStmt::setInvalidDomain(isl::set ID) { InvalidDomain = ID; } BasicBlock *ScopStmt::getEntryBlock() const { if (isBlockStmt()) return getBasicBlock(); return getRegion()->getEntry(); } unsigned ScopStmt::getNumIterators() const { return NestLoops.size(); } const char *ScopStmt::getBaseName() const { return BaseName.c_str(); } Loop *ScopStmt::getLoopForDimension(unsigned Dimension) const { return NestLoops[Dimension]; } isl::ctx ScopStmt::getIslCtx() const { return Parent.getIslCtx(); } isl::set ScopStmt::getDomain() const { return Domain; } isl::space ScopStmt::getDomainSpace() const { return Domain.get_space(); } isl::id ScopStmt::getDomainId() const { return Domain.get_tuple_id(); } void ScopStmt::printInstructions(raw_ostream &OS) const { OS << "Instructions {\n"; for (Instruction *Inst : Instructions) OS.indent(16) << *Inst << "\n"; OS.indent(12) << "}\n"; } void ScopStmt::print(raw_ostream &OS, bool PrintInstructions) const { OS << "\t" << getBaseName() << "\n"; OS.indent(12) << "Domain :=\n"; if (Domain) { OS.indent(16) << getDomainStr() << ";\n"; } else OS.indent(16) << "n/a\n"; OS.indent(12) << "Schedule :=\n"; if (Domain) { OS.indent(16) << getScheduleStr() << ";\n"; } else OS.indent(16) << "n/a\n"; for (MemoryAccess *Access : MemAccs) Access->print(OS); if (PrintInstructions) printInstructions(OS.indent(12)); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ScopStmt::dump() const { print(dbgs(), true); } #endif void ScopStmt::removeAccessData(MemoryAccess *MA) { if (MA->isRead() && MA->isOriginalValueKind()) { bool Found = ValueReads.erase(MA->getAccessValue()); (void)Found; assert(Found && "Expected access data not found"); } if (MA->isWrite() && MA->isOriginalValueKind()) { bool Found = ValueWrites.erase(cast(MA->getAccessValue())); (void)Found; assert(Found && "Expected access data not found"); } if (MA->isWrite() && MA->isOriginalAnyPHIKind()) { bool Found = PHIWrites.erase(cast(MA->getAccessInstruction())); (void)Found; assert(Found && "Expected access data not found"); } if (MA->isRead() && MA->isOriginalAnyPHIKind()) { bool Found = PHIReads.erase(cast(MA->getAccessInstruction())); (void)Found; assert(Found && "Expected access data not found"); } } void ScopStmt::removeMemoryAccess(MemoryAccess *MA) { // Remove the memory accesses from this statement together with all scalar // accesses that were caused by it. MemoryKind::Value READs have no access // instruction, hence would not be removed by this function. However, it is // only used for invariant LoadInst accesses, its arguments are always affine, // hence synthesizable, and therefore there are no MemoryKind::Value READ // accesses to be removed. auto Predicate = [&](MemoryAccess *Acc) { return Acc->getAccessInstruction() == MA->getAccessInstruction(); }; for (auto *MA : MemAccs) { if (Predicate(MA)) { removeAccessData(MA); Parent.removeAccessData(MA); } } MemAccs.erase(std::remove_if(MemAccs.begin(), MemAccs.end(), Predicate), MemAccs.end()); InstructionToAccess.erase(MA->getAccessInstruction()); } void ScopStmt::removeSingleMemoryAccess(MemoryAccess *MA, bool AfterHoisting) { if (AfterHoisting) { auto MAIt = std::find(MemAccs.begin(), MemAccs.end(), MA); assert(MAIt != MemAccs.end()); MemAccs.erase(MAIt); removeAccessData(MA); Parent.removeAccessData(MA); } auto It = InstructionToAccess.find(MA->getAccessInstruction()); if (It != InstructionToAccess.end()) { It->second.remove(MA); if (It->second.empty()) InstructionToAccess.erase(MA->getAccessInstruction()); } } MemoryAccess *ScopStmt::ensureValueRead(Value *V) { MemoryAccess *Access = lookupInputAccessOf(V); if (Access) return Access; ScopArrayInfo *SAI = Parent.getOrCreateScopArrayInfo(V, V->getType(), {}, MemoryKind::Value); Access = new MemoryAccess(this, nullptr, MemoryAccess::READ, V, V->getType(), true, {}, {}, V, MemoryKind::Value); Parent.addAccessFunction(Access); Access->buildAccessRelation(SAI); addAccess(Access); Parent.addAccessData(Access); return Access; } raw_ostream &polly::operator<<(raw_ostream &OS, const ScopStmt &S) { S.print(OS, PollyPrintInstructions); return OS; } //===----------------------------------------------------------------------===// /// Scop class implement void Scop::setContext(isl::set NewContext) { Context = NewContext.align_params(Context.get_space()); } namespace { /// Remap parameter values but keep AddRecs valid wrt. invariant loads. struct SCEVSensitiveParameterRewriter : public SCEVRewriteVisitor { const ValueToValueMap &VMap; public: SCEVSensitiveParameterRewriter(const ValueToValueMap &VMap, ScalarEvolution &SE) : SCEVRewriteVisitor(SE), VMap(VMap) {} static const SCEV *rewrite(const SCEV *E, ScalarEvolution &SE, const ValueToValueMap &VMap) { SCEVSensitiveParameterRewriter SSPR(VMap, SE); return SSPR.visit(E); } const SCEV *visitAddRecExpr(const SCEVAddRecExpr *E) { auto *Start = visit(E->getStart()); auto *AddRec = SE.getAddRecExpr(SE.getConstant(E->getType(), 0), visit(E->getStepRecurrence(SE)), E->getLoop(), SCEV::FlagAnyWrap); return SE.getAddExpr(Start, AddRec); } const SCEV *visitUnknown(const SCEVUnknown *E) { if (auto *NewValue = VMap.lookup(E->getValue())) return SE.getUnknown(NewValue); return E; } }; /// Check whether we should remap a SCEV expression. struct SCEVFindInsideScop : public SCEVTraversal { const ValueToValueMap &VMap; bool FoundInside = false; const Scop *S; public: SCEVFindInsideScop(const ValueToValueMap &VMap, ScalarEvolution &SE, const Scop *S) : SCEVTraversal(*this), VMap(VMap), S(S) {} static bool hasVariant(const SCEV *E, ScalarEvolution &SE, const ValueToValueMap &VMap, const Scop *S) { SCEVFindInsideScop SFIS(VMap, SE, S); SFIS.visitAll(E); return SFIS.FoundInside; } bool follow(const SCEV *E) { if (auto *AddRec = dyn_cast(E)) { FoundInside |= S->getRegion().contains(AddRec->getLoop()); } else if (auto *Unknown = dyn_cast(E)) { if (Instruction *I = dyn_cast(Unknown->getValue())) FoundInside |= S->getRegion().contains(I) && !VMap.count(I); } return !FoundInside; } bool isDone() { return FoundInside; } }; } // end anonymous namespace const SCEV *Scop::getRepresentingInvariantLoadSCEV(const SCEV *E) const { // Check whether it makes sense to rewrite the SCEV. (ScalarEvolution // doesn't like addition between an AddRec and an expression that // doesn't have a dominance relationship with it.) if (SCEVFindInsideScop::hasVariant(E, *SE, InvEquivClassVMap, this)) return E; // Rewrite SCEV. return SCEVSensitiveParameterRewriter::rewrite(E, *SE, InvEquivClassVMap); } // This table of function names is used to translate parameter names in more // human-readable names. This makes it easier to interpret Polly analysis // results. StringMap KnownNames = { {"_Z13get_global_idj", "global_id"}, {"_Z12get_local_idj", "local_id"}, {"_Z15get_global_sizej", "global_size"}, {"_Z14get_local_sizej", "local_size"}, {"_Z12get_work_dimv", "work_dim"}, {"_Z17get_global_offsetj", "global_offset"}, {"_Z12get_group_idj", "group_id"}, {"_Z14get_num_groupsj", "num_groups"}, }; static std::string getCallParamName(CallInst *Call) { std::string Result; raw_string_ostream OS(Result); std::string Name = Call->getCalledFunction()->getName(); auto Iterator = KnownNames.find(Name); if (Iterator != KnownNames.end()) Name = "__" + Iterator->getValue(); OS << Name; for (auto &Operand : Call->arg_operands()) { ConstantInt *Op = cast(&Operand); OS << "_" << Op->getValue(); } OS.flush(); return Result; } void Scop::createParameterId(const SCEV *Parameter) { assert(Parameters.count(Parameter)); assert(!ParameterIds.count(Parameter)); std::string ParameterName = "p_" + std::to_string(getNumParams() - 1); if (const SCEVUnknown *ValueParameter = dyn_cast(Parameter)) { Value *Val = ValueParameter->getValue(); CallInst *Call = dyn_cast(Val); if (Call && isConstCall(Call)) { ParameterName = getCallParamName(Call); } else if (UseInstructionNames) { // If this parameter references a specific Value and this value has a name // we use this name as it is likely to be unique and more useful than just // a number. if (Val->hasName()) ParameterName = Val->getName(); else if (LoadInst *LI = dyn_cast(Val)) { auto *LoadOrigin = LI->getPointerOperand()->stripInBoundsOffsets(); if (LoadOrigin->hasName()) { ParameterName += "_loaded_from_"; ParameterName += LI->getPointerOperand()->stripInBoundsOffsets()->getName(); } } } ParameterName = getIslCompatibleName("", ParameterName, ""); } isl::id Id = isl::id::alloc(getIslCtx(), ParameterName, const_cast((const void *)Parameter)); ParameterIds[Parameter] = Id; } void Scop::addParams(const ParameterSetTy &NewParameters) { for (const SCEV *Parameter : NewParameters) { // Normalize the SCEV to get the representing element for an invariant load. Parameter = extractConstantFactor(Parameter, *SE).second; Parameter = getRepresentingInvariantLoadSCEV(Parameter); if (Parameters.insert(Parameter)) createParameterId(Parameter); } } isl::id Scop::getIdForParam(const SCEV *Parameter) const { // Normalize the SCEV to get the representing element for an invariant load. Parameter = getRepresentingInvariantLoadSCEV(Parameter); return ParameterIds.lookup(Parameter); } isl::set Scop::addNonEmptyDomainConstraints(isl::set C) const { isl::set DomainContext = getDomains().params(); return C.intersect_params(DomainContext); } bool Scop::isDominatedBy(const DominatorTree &DT, BasicBlock *BB) const { return DT.dominates(BB, getEntry()); } void Scop::addUserAssumptions( AssumptionCache &AC, DominatorTree &DT, LoopInfo &LI, DenseMap &InvalidDomainMap) { for (auto &Assumption : AC.assumptions()) { auto *CI = dyn_cast_or_null(Assumption); if (!CI || CI->getNumArgOperands() != 1) continue; bool InScop = contains(CI); if (!InScop && !isDominatedBy(DT, CI->getParent())) continue; auto *L = LI.getLoopFor(CI->getParent()); auto *Val = CI->getArgOperand(0); ParameterSetTy DetectedParams; if (!isAffineConstraint(Val, &R, L, *SE, DetectedParams)) { ORE.emit( OptimizationRemarkAnalysis(DEBUG_TYPE, "IgnoreUserAssumption", CI) << "Non-affine user assumption ignored."); continue; } // Collect all newly introduced parameters. ParameterSetTy NewParams; for (auto *Param : DetectedParams) { Param = extractConstantFactor(Param, *SE).second; Param = getRepresentingInvariantLoadSCEV(Param); if (Parameters.count(Param)) continue; NewParams.insert(Param); } SmallVector ConditionSets; auto *TI = InScop ? CI->getParent()->getTerminator() : nullptr; BasicBlock *BB = InScop ? CI->getParent() : getRegion().getEntry(); auto *Dom = InScop ? DomainMap[BB].copy() : Context.copy(); assert(Dom && "Cannot propagate a nullptr."); bool Valid = buildConditionSets(*this, BB, Val, TI, L, Dom, InvalidDomainMap, ConditionSets); isl_set_free(Dom); if (!Valid) continue; isl_set *AssumptionCtx = nullptr; if (InScop) { AssumptionCtx = isl_set_complement(isl_set_params(ConditionSets[1])); isl_set_free(ConditionSets[0]); } else { AssumptionCtx = isl_set_complement(ConditionSets[1]); AssumptionCtx = isl_set_intersect(AssumptionCtx, ConditionSets[0]); } // Project out newly introduced parameters as they are not otherwise useful. if (!NewParams.empty()) { for (unsigned u = 0; u < isl_set_n_param(AssumptionCtx); u++) { auto *Id = isl_set_get_dim_id(AssumptionCtx, isl_dim_param, u); auto *Param = static_cast(isl_id_get_user(Id)); isl_id_free(Id); if (!NewParams.count(Param)) continue; AssumptionCtx = isl_set_project_out(AssumptionCtx, isl_dim_param, u--, 1); } } ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "UserAssumption", CI) << "Use user assumption: " << stringFromIslObj(AssumptionCtx)); Context = Context.intersect(isl::manage(AssumptionCtx)); } } void Scop::addUserContext() { if (UserContextStr.empty()) return; isl::set UserContext = isl::set(getIslCtx(), UserContextStr.c_str()); isl::space Space = getParamSpace(); if (Space.dim(isl::dim::param) != UserContext.dim(isl::dim::param)) { std::string SpaceStr = Space.to_str(); errs() << "Error: the context provided in -polly-context has not the same " << "number of dimensions than the computed context. Due to this " << "mismatch, the -polly-context option is ignored. Please provide " << "the context in the parameter space: " << SpaceStr << ".\n"; return; } for (unsigned i = 0; i < Space.dim(isl::dim::param); i++) { std::string NameContext = Context.get_dim_name(isl::dim::param, i); std::string NameUserContext = UserContext.get_dim_name(isl::dim::param, i); if (NameContext != NameUserContext) { std::string SpaceStr = Space.to_str(); errs() << "Error: the name of dimension " << i << " provided in -polly-context " << "is '" << NameUserContext << "', but the name in the computed " << "context is '" << NameContext << "'. Due to this name mismatch, " << "the -polly-context option is ignored. Please provide " << "the context in the parameter space: " << SpaceStr << ".\n"; return; } UserContext = UserContext.set_dim_id(isl::dim::param, i, Space.get_dim_id(isl::dim::param, i)); } Context = Context.intersect(UserContext); } void Scop::buildInvariantEquivalenceClasses() { DenseMap, LoadInst *> EquivClasses; const InvariantLoadsSetTy &RIL = getRequiredInvariantLoads(); for (LoadInst *LInst : RIL) { const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); Type *Ty = LInst->getType(); LoadInst *&ClassRep = EquivClasses[std::make_pair(PointerSCEV, Ty)]; if (ClassRep) { InvEquivClassVMap[LInst] = ClassRep; continue; } ClassRep = LInst; InvariantEquivClasses.emplace_back( InvariantEquivClassTy{PointerSCEV, MemoryAccessList(), nullptr, Ty}); } } void Scop::buildContext() { isl::space Space = isl::space::params_alloc(getIslCtx(), 0); Context = isl::set::universe(Space); InvalidContext = isl::set::empty(Space); AssumedContext = isl::set::universe(Space); } void Scop::addParameterBounds() { unsigned PDim = 0; for (auto *Parameter : Parameters) { ConstantRange SRange = SE->getSignedRange(Parameter); Context = addRangeBoundsToSet(Context, SRange, PDim++, isl::dim::param); } } static std::vector getFortranArrayIds(Scop::array_range Arrays) { std::vector OutermostSizeIds; for (auto Array : Arrays) { // To check if an array is a Fortran array, we check if it has a isl_pw_aff // for its outermost dimension. Fortran arrays will have this since the // outermost dimension size can be picked up from their runtime description. // TODO: actually need to check if it has a FAD, but for now this works. if (Array->getNumberOfDimensions() > 0) { isl::pw_aff PwAff = Array->getDimensionSizePw(0); if (!PwAff) continue; isl::id Id = PwAff.get_dim_id(isl::dim::param, 0); assert(!Id.is_null() && "Invalid Id for PwAff expression in Fortran array"); OutermostSizeIds.push_back(Id); } } return OutermostSizeIds; } // The FORTRAN array size parameters are known to be non-negative. static isl::set boundFortranArrayParams(isl::set Context, Scop::array_range Arrays) { std::vector OutermostSizeIds; OutermostSizeIds = getFortranArrayIds(Arrays); for (isl::id Id : OutermostSizeIds) { int dim = Context.find_dim_by_id(isl::dim::param, Id); Context = Context.lower_bound_si(isl::dim::param, dim, 0); } return Context; } void Scop::realignParams() { if (PollyIgnoreParamBounds) return; // Add all parameters into a common model. isl::space Space = getFullParamSpace(); // Align the parameters of all data structures to the model. Context = Context.align_params(Space); // Bound the size of the fortran array dimensions. Context = boundFortranArrayParams(Context, arrays()); // As all parameters are known add bounds to them. addParameterBounds(); for (ScopStmt &Stmt : *this) Stmt.realignParams(); // Simplify the schedule according to the context too. Schedule = Schedule.gist_domain_params(getContext()); } static isl::set simplifyAssumptionContext(isl::set AssumptionContext, const Scop &S) { // If we have modeled all blocks in the SCoP that have side effects we can // simplify the context with the constraints that are needed for anything to // be executed at all. However, if we have error blocks in the SCoP we already // assumed some parameter combinations cannot occur and removed them from the // domains, thus we cannot use the remaining domain to simplify the // assumptions. if (!S.hasErrorBlock()) { auto DomainParameters = S.getDomains().params(); AssumptionContext = AssumptionContext.gist_params(DomainParameters); } AssumptionContext = AssumptionContext.gist_params(S.getContext()); return AssumptionContext; } void Scop::simplifyContexts() { // The parameter constraints of the iteration domains give us a set of // constraints that need to hold for all cases where at least a single // statement iteration is executed in the whole scop. We now simplify the // assumed context under the assumption that such constraints hold and at // least a single statement iteration is executed. For cases where no // statement instances are executed, the assumptions we have taken about // the executed code do not matter and can be changed. // // WARNING: This only holds if the assumptions we have taken do not reduce // the set of statement instances that are executed. Otherwise we // may run into a case where the iteration domains suggest that // for a certain set of parameter constraints no code is executed, // but in the original program some computation would have been // performed. In such a case, modifying the run-time conditions and // possibly influencing the run-time check may cause certain scops // to not be executed. // // Example: // // When delinearizing the following code: // // for (long i = 0; i < 100; i++) // for (long j = 0; j < m; j++) // A[i+p][j] = 1.0; // // we assume that the condition m <= 0 or (m >= 1 and p >= 0) holds as // otherwise we would access out of bound data. Now, knowing that code is // only executed for the case m >= 0, it is sufficient to assume p >= 0. AssumedContext = simplifyAssumptionContext(AssumedContext, *this); InvalidContext = InvalidContext.align_params(getParamSpace()); } /// Add the minimal/maximal access in @p Set to @p User. static isl::stat buildMinMaxAccess(isl::set Set, Scop::MinMaxVectorTy &MinMaxAccesses, Scop &S) { isl::pw_multi_aff MinPMA, MaxPMA; isl::pw_aff LastDimAff; isl::aff OneAff; unsigned Pos; Set = Set.remove_divs(); polly::simplify(Set); if (Set.n_basic_set() > RunTimeChecksMaxAccessDisjuncts) Set = Set.simple_hull(); // Restrict the number of parameters involved in the access as the lexmin/ // lexmax computation will take too long if this number is high. // // Experiments with a simple test case using an i7 4800MQ: // // #Parameters involved | Time (in sec) // 6 | 0.01 // 7 | 0.04 // 8 | 0.12 // 9 | 0.40 // 10 | 1.54 // 11 | 6.78 // 12 | 30.38 // if (isl_set_n_param(Set.get()) > RunTimeChecksMaxParameters) { unsigned InvolvedParams = 0; for (unsigned u = 0, e = isl_set_n_param(Set.get()); u < e; u++) if (Set.involves_dims(isl::dim::param, u, 1)) InvolvedParams++; if (InvolvedParams > RunTimeChecksMaxParameters) return isl::stat::error; } MinPMA = Set.lexmin_pw_multi_aff(); MaxPMA = Set.lexmax_pw_multi_aff(); MinPMA = MinPMA.coalesce(); MaxPMA = MaxPMA.coalesce(); // Adjust the last dimension of the maximal access by one as we want to // enclose the accessed memory region by MinPMA and MaxPMA. The pointer // we test during code generation might now point after the end of the // allocated array but we will never dereference it anyway. assert((!MaxPMA || MaxPMA.dim(isl::dim::out)) && "Assumed at least one output dimension"); Pos = MaxPMA.dim(isl::dim::out) - 1; LastDimAff = MaxPMA.get_pw_aff(Pos); OneAff = isl::aff(isl::local_space(LastDimAff.get_domain_space())); OneAff = OneAff.add_constant_si(1); LastDimAff = LastDimAff.add(OneAff); MaxPMA = MaxPMA.set_pw_aff(Pos, LastDimAff); if (!MinPMA || !MaxPMA) return isl::stat::error; MinMaxAccesses.push_back(std::make_pair(MinPMA, MaxPMA)); return isl::stat::ok; } static __isl_give isl_set *getAccessDomain(MemoryAccess *MA) { isl_set *Domain = MA->getStatement()->getDomain().release(); Domain = isl_set_project_out(Domain, isl_dim_set, 0, isl_set_n_dim(Domain)); return isl_set_reset_tuple_id(Domain); } /// Wrapper function to calculate minimal/maximal accesses to each array. static bool calculateMinMaxAccess(Scop::AliasGroupTy AliasGroup, Scop &S, Scop::MinMaxVectorTy &MinMaxAccesses) { MinMaxAccesses.reserve(AliasGroup.size()); isl::union_set Domains = S.getDomains(); isl::union_map Accesses = isl::union_map::empty(S.getParamSpace()); for (MemoryAccess *MA : AliasGroup) Accesses = Accesses.add_map(MA->getAccessRelation()); Accesses = Accesses.intersect_domain(Domains); isl::union_set Locations = Accesses.range(); auto Lambda = [&MinMaxAccesses, &S](isl::set Set) -> isl::stat { return buildMinMaxAccess(Set, MinMaxAccesses, S); }; return Locations.foreach_set(Lambda) == isl::stat::ok; } /// Helper to treat non-affine regions and basic blocks the same. /// ///{ /// Return the block that is the representing block for @p RN. static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) { return RN->isSubRegion() ? RN->getNodeAs()->getEntry() : RN->getNodeAs(); } /// Return the @p idx'th block that is executed after @p RN. static inline BasicBlock * getRegionNodeSuccessor(RegionNode *RN, TerminatorInst *TI, unsigned idx) { if (RN->isSubRegion()) { assert(idx == 0); return RN->getNodeAs()->getExit(); } return TI->getSuccessor(idx); } /// Return the smallest loop surrounding @p RN. static inline Loop *getRegionNodeLoop(RegionNode *RN, LoopInfo &LI) { if (!RN->isSubRegion()) { BasicBlock *BB = RN->getNodeAs(); Loop *L = LI.getLoopFor(BB); // Unreachable statements are not considered to belong to a LLVM loop, as // they are not part of an actual loop in the control flow graph. // Nevertheless, we handle certain unreachable statements that are common // when modeling run-time bounds checks as being part of the loop to be // able to model them and to later eliminate the run-time bounds checks. // // Specifically, for basic blocks that terminate in an unreachable and // where the immediate predecessor is part of a loop, we assume these // basic blocks belong to the loop the predecessor belongs to. This // allows us to model the following code. // // for (i = 0; i < N; i++) { // if (i > 1024) // abort(); <- this abort might be translated to an // unreachable // // A[i] = ... // } if (!L && isa(BB->getTerminator()) && BB->getPrevNode()) L = LI.getLoopFor(BB->getPrevNode()); return L; } Region *NonAffineSubRegion = RN->getNodeAs(); Loop *L = LI.getLoopFor(NonAffineSubRegion->getEntry()); while (L && NonAffineSubRegion->contains(L)) L = L->getParentLoop(); return L; } /// Get the number of blocks in @p L. /// /// The number of blocks in a loop are the number of basic blocks actually /// belonging to the loop, as well as all single basic blocks that the loop /// exits to and which terminate in an unreachable instruction. We do not /// allow such basic blocks in the exit of a scop, hence they belong to the /// scop and represent run-time conditions which we want to model and /// subsequently speculate away. /// /// @see getRegionNodeLoop for additional details. unsigned getNumBlocksInLoop(Loop *L) { unsigned NumBlocks = L->getNumBlocks(); SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); for (auto ExitBlock : ExitBlocks) { if (isa(ExitBlock->getTerminator())) NumBlocks++; } return NumBlocks; } static inline unsigned getNumBlocksInRegionNode(RegionNode *RN) { if (!RN->isSubRegion()) return 1; Region *R = RN->getNodeAs(); return std::distance(R->block_begin(), R->block_end()); } static bool containsErrorBlock(RegionNode *RN, const Region &R, LoopInfo &LI, const DominatorTree &DT) { if (!RN->isSubRegion()) return isErrorBlock(*RN->getNodeAs(), R, LI, DT); for (BasicBlock *BB : RN->getNodeAs()->blocks()) if (isErrorBlock(*BB, R, LI, DT)) return true; return false; } ///} isl::set Scop::getDomainConditions(const ScopStmt *Stmt) const { return getDomainConditions(Stmt->getEntryBlock()); } isl::set Scop::getDomainConditions(BasicBlock *BB) const { auto DIt = DomainMap.find(BB); if (DIt != DomainMap.end()) return DIt->getSecond(); auto &RI = *R.getRegionInfo(); auto *BBR = RI.getRegionFor(BB); while (BBR->getEntry() == BB) BBR = BBR->getParent(); return getDomainConditions(BBR->getEntry()); } bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI, DenseMap &InvalidDomainMap) { bool IsOnlyNonAffineRegion = isNonAffineSubRegion(R); auto *EntryBB = R->getEntry(); auto *L = IsOnlyNonAffineRegion ? nullptr : LI.getLoopFor(EntryBB); int LD = getRelativeLoopDepth(L); auto *S = isl_set_universe(isl_space_set_alloc(getIslCtx().get(), 0, LD + 1)); while (LD-- >= 0) { L = L->getParentLoop(); } InvalidDomainMap[EntryBB] = isl::manage(isl_set_empty(isl_set_get_space(S))); DomainMap[EntryBB] = isl::manage(S); if (IsOnlyNonAffineRegion) return !containsErrorBlock(R->getNode(), *R, LI, DT); if (!buildDomainsWithBranchConstraints(R, DT, LI, InvalidDomainMap)) return false; if (!propagateDomainConstraints(R, DT, LI, InvalidDomainMap)) return false; // Error blocks and blocks dominated by them have been assumed to never be // executed. Representing them in the Scop does not add any value. In fact, // it is likely to cause issues during construction of the ScopStmts. The // contents of error blocks have not been verified to be expressible and // will cause problems when building up a ScopStmt for them. // Furthermore, basic blocks dominated by error blocks may reference // instructions in the error block which, if the error block is not modeled, // can themselves not be constructed properly. To this end we will replace // the domains of error blocks and those only reachable via error blocks // with an empty set. Additionally, we will record for each block under which // parameter combination it would be reached via an error block in its // InvalidDomain. This information is needed during load hoisting. if (!propagateInvalidStmtDomains(R, DT, LI, InvalidDomainMap)) return false; return true; } /// Adjust the dimensions of @p Dom that was constructed for @p OldL /// to be compatible to domains constructed for loop @p NewL. /// /// This function assumes @p NewL and @p OldL are equal or there is a CFG /// edge from @p OldL to @p NewL. static isl::set adjustDomainDimensions(Scop &S, isl::set Dom, Loop *OldL, Loop *NewL) { // If the loops are the same there is nothing to do. if (NewL == OldL) return Dom; int OldDepth = S.getRelativeLoopDepth(OldL); int NewDepth = S.getRelativeLoopDepth(NewL); // If both loops are non-affine loops there is nothing to do. if (OldDepth == -1 && NewDepth == -1) return Dom; // Distinguish three cases: // 1) The depth is the same but the loops are not. // => One loop was left one was entered. // 2) The depth increased from OldL to NewL. // => One loop was entered, none was left. // 3) The depth decreased from OldL to NewL. // => Loops were left were difference of the depths defines how many. if (OldDepth == NewDepth) { assert(OldL->getParentLoop() == NewL->getParentLoop()); Dom = Dom.project_out(isl::dim::set, NewDepth, 1); Dom = Dom.add_dims(isl::dim::set, 1); } else if (OldDepth < NewDepth) { assert(OldDepth + 1 == NewDepth); auto &R = S.getRegion(); (void)R; assert(NewL->getParentLoop() == OldL || ((!OldL || !R.contains(OldL)) && R.contains(NewL))); Dom = Dom.add_dims(isl::dim::set, 1); } else { assert(OldDepth > NewDepth); int Diff = OldDepth - NewDepth; int NumDim = Dom.n_dim(); assert(NumDim >= Diff); Dom = Dom.project_out(isl::dim::set, NumDim - Diff, Diff); } return Dom; } bool Scop::propagateInvalidStmtDomains( Region *R, DominatorTree &DT, LoopInfo &LI, DenseMap &InvalidDomainMap) { ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { // Recurse for affine subregions but go on for basic blocks and non-affine // subregions. if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); if (!isNonAffineSubRegion(SubRegion)) { propagateInvalidStmtDomains(SubRegion, DT, LI, InvalidDomainMap); continue; } } bool ContainsErrorBlock = containsErrorBlock(RN, getRegion(), LI, DT); BasicBlock *BB = getRegionNodeBasicBlock(RN); isl::set &Domain = DomainMap[BB]; assert(Domain && "Cannot propagate a nullptr"); isl::set InvalidDomain = InvalidDomainMap[BB]; bool IsInvalidBlock = ContainsErrorBlock || Domain.is_subset(InvalidDomain); if (!IsInvalidBlock) { InvalidDomain = InvalidDomain.intersect(Domain); } else { InvalidDomain = Domain; isl::set DomPar = Domain.params(); recordAssumption(ERRORBLOCK, DomPar, BB->getTerminator()->getDebugLoc(), AS_RESTRICTION); Domain = nullptr; } if (InvalidDomain.is_empty()) { InvalidDomainMap[BB] = InvalidDomain; continue; } auto *BBLoop = getRegionNodeLoop(RN, LI); auto *TI = BB->getTerminator(); unsigned NumSuccs = RN->isSubRegion() ? 1 : TI->getNumSuccessors(); for (unsigned u = 0; u < NumSuccs; u++) { auto *SuccBB = getRegionNodeSuccessor(RN, TI, u); // Skip successors outside the SCoP. if (!contains(SuccBB)) continue; // Skip backedges. if (DT.dominates(SuccBB, BB)) continue; Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops()); auto AdjustedInvalidDomain = adjustDomainDimensions(*this, InvalidDomain, BBLoop, SuccBBLoop); isl::set SuccInvalidDomain = InvalidDomainMap[SuccBB]; SuccInvalidDomain = SuccInvalidDomain.unite(AdjustedInvalidDomain); SuccInvalidDomain = SuccInvalidDomain.coalesce(); unsigned NumConjucts = SuccInvalidDomain.n_basic_set(); InvalidDomainMap[SuccBB] = SuccInvalidDomain; // Check if the maximal number of domain disjunctions was reached. // In case this happens we will bail. if (NumConjucts < MaxDisjunctsInDomain) continue; InvalidDomainMap.erase(BB); invalidate(COMPLEXITY, TI->getDebugLoc(), TI->getParent()); return false; } InvalidDomainMap[BB] = InvalidDomain; } return true; } void Scop::propagateDomainConstraintsToRegionExit( BasicBlock *BB, Loop *BBLoop, SmallPtrSetImpl &FinishedExitBlocks, LoopInfo &LI, DenseMap &InvalidDomainMap) { // Check if the block @p BB is the entry of a region. If so we propagate it's // domain to the exit block of the region. Otherwise we are done. auto *RI = R.getRegionInfo(); auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr; auto *ExitBB = BBReg ? BBReg->getExit() : nullptr; if (!BBReg || BBReg->getEntry() != BB || !contains(ExitBB)) return; // Do not propagate the domain if there is a loop backedge inside the region // that would prevent the exit block from being executed. auto *L = BBLoop; while (L && contains(L)) { SmallVector LatchBBs; BBLoop->getLoopLatches(LatchBBs); for (auto *LatchBB : LatchBBs) if (BB != LatchBB && BBReg->contains(LatchBB)) return; L = L->getParentLoop(); } isl::set Domain = DomainMap[BB]; assert(Domain && "Cannot propagate a nullptr"); Loop *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, getBoxedLoops()); // Since the dimensions of @p BB and @p ExitBB might be different we have to // adjust the domain before we can propagate it. isl::set AdjustedDomain = adjustDomainDimensions(*this, Domain, BBLoop, ExitBBLoop); isl::set &ExitDomain = DomainMap[ExitBB]; // If the exit domain is not yet created we set it otherwise we "add" the // current domain. ExitDomain = ExitDomain ? AdjustedDomain.unite(ExitDomain) : AdjustedDomain; // Initialize the invalid domain. InvalidDomainMap[ExitBB] = ExitDomain.empty(ExitDomain.get_space()); FinishedExitBlocks.insert(ExitBB); } bool Scop::buildDomainsWithBranchConstraints( Region *R, DominatorTree &DT, LoopInfo &LI, DenseMap &InvalidDomainMap) { // To create the domain for each block in R we iterate over all blocks and // subregions in R and propagate the conditions under which the current region // element is executed. To this end we iterate in reverse post order over R as // it ensures that we first visit all predecessors of a region node (either a // basic block or a subregion) before we visit the region node itself. // Initially, only the domain for the SCoP region entry block is set and from // there we propagate the current domain to all successors, however we add the // condition that the successor is actually executed next. // As we are only interested in non-loop carried constraints here we can // simply skip loop back edges. SmallPtrSet FinishedExitBlocks; ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { // Recurse for affine subregions but go on for basic blocks and non-affine // subregions. if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); if (!isNonAffineSubRegion(SubRegion)) { if (!buildDomainsWithBranchConstraints(SubRegion, DT, LI, InvalidDomainMap)) return false; continue; } } if (containsErrorBlock(RN, getRegion(), LI, DT)) HasErrorBlock = true; BasicBlock *BB = getRegionNodeBasicBlock(RN); TerminatorInst *TI = BB->getTerminator(); if (isa(TI)) continue; isl::set Domain = DomainMap.lookup(BB); if (!Domain) continue; MaxLoopDepth = std::max(MaxLoopDepth, isl_set_n_dim(Domain.get())); auto *BBLoop = getRegionNodeLoop(RN, LI); // Propagate the domain from BB directly to blocks that have a superset // domain, at the moment only region exit nodes of regions that start in BB. propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, LI, InvalidDomainMap); // If all successors of BB have been set a domain through the propagation // above we do not need to build condition sets but can just skip this // block. However, it is important to note that this is a local property // with regards to the region @p R. To this end FinishedExitBlocks is a // local variable. auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) { return FinishedExitBlocks.count(SuccBB); }; if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit)) continue; // Build the condition sets for the successor nodes of the current region // node. If it is a non-affine subregion we will always execute the single // exit node, hence the single entry node domain is the condition set. For // basic blocks we use the helper function buildConditionSets. SmallVector ConditionSets; if (RN->isSubRegion()) ConditionSets.push_back(Domain.copy()); else if (!buildConditionSets(*this, BB, TI, BBLoop, Domain.get(), InvalidDomainMap, ConditionSets)) return false; // Now iterate over the successors and set their initial domain based on // their condition set. We skip back edges here and have to be careful when // we leave a loop not to keep constraints over a dimension that doesn't // exist anymore. assert(RN->isSubRegion() || TI->getNumSuccessors() == ConditionSets.size()); for (unsigned u = 0, e = ConditionSets.size(); u < e; u++) { isl::set CondSet = isl::manage(ConditionSets[u]); BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u); // Skip blocks outside the region. if (!contains(SuccBB)) continue; // If we propagate the domain of some block to "SuccBB" we do not have to // adjust the domain. if (FinishedExitBlocks.count(SuccBB)) continue; // Skip back edges. if (DT.dominates(SuccBB, BB)) continue; Loop *SuccBBLoop = getFirstNonBoxedLoopFor(SuccBB, LI, getBoxedLoops()); CondSet = adjustDomainDimensions(*this, CondSet, BBLoop, SuccBBLoop); // Set the domain for the successor or merge it with an existing domain in // case there are multiple paths (without loop back edges) to the // successor block. isl::set &SuccDomain = DomainMap[SuccBB]; if (SuccDomain) { SuccDomain = SuccDomain.unite(CondSet).coalesce(); } else { // Initialize the invalid domain. InvalidDomainMap[SuccBB] = CondSet.empty(CondSet.get_space()); SuccDomain = CondSet; } SuccDomain = SuccDomain.detect_equalities(); // Check if the maximal number of domain disjunctions was reached. // In case this happens we will clean up and bail. if (SuccDomain.n_basic_set() < MaxDisjunctsInDomain) continue; invalidate(COMPLEXITY, DebugLoc()); while (++u < ConditionSets.size()) isl_set_free(ConditionSets[u]); return false; } } return true; } isl::set Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl::set Domain, DominatorTree &DT, LoopInfo &LI) { // If @p BB is the ScopEntry we are done if (R.getEntry() == BB) return isl::set::universe(Domain.get_space()); // The region info of this function. auto &RI = *R.getRegionInfo(); Loop *BBLoop = getFirstNonBoxedLoopFor(BB, LI, getBoxedLoops()); // A domain to collect all predecessor domains, thus all conditions under // which the block is executed. To this end we start with the empty domain. isl::set PredDom = isl::set::empty(Domain.get_space()); // Set of regions of which the entry block domain has been propagated to BB. // all predecessors inside any of the regions can be skipped. SmallSet PropagatedRegions; for (auto *PredBB : predecessors(BB)) { // Skip backedges. if (DT.dominates(BB, PredBB)) continue; // If the predecessor is in a region we used for propagation we can skip it. auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); }; if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(), PredBBInRegion)) { continue; } // Check if there is a valid region we can use for propagation, thus look // for a region that contains the predecessor and has @p BB as exit block. auto *PredR = RI.getRegionFor(PredBB); while (PredR->getExit() != BB && !PredR->contains(BB)) PredR->getParent(); // If a valid region for propagation was found use the entry of that region // for propagation, otherwise the PredBB directly. if (PredR->getExit() == BB) { PredBB = PredR->getEntry(); PropagatedRegions.insert(PredR); } isl::set PredBBDom = getDomainConditions(PredBB); Loop *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, getBoxedLoops()); PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop); PredDom = PredDom.unite(PredBBDom); } return PredDom; } bool Scop::propagateDomainConstraints( Region *R, DominatorTree &DT, LoopInfo &LI, DenseMap &InvalidDomainMap) { // Iterate over the region R and propagate the domain constrains from the // predecessors to the current node. In contrast to the // buildDomainsWithBranchConstraints function, this one will pull the domain // information from the predecessors instead of pushing it to the successors. // Additionally, we assume the domains to be already present in the domain // map here. However, we iterate again in reverse post order so we know all // predecessors have been visited before a block or non-affine subregion is // visited. ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { // Recurse for affine subregions but go on for basic blocks and non-affine // subregions. if (RN->isSubRegion()) { Region *SubRegion = RN->getNodeAs(); if (!isNonAffineSubRegion(SubRegion)) { if (!propagateDomainConstraints(SubRegion, DT, LI, InvalidDomainMap)) return false; continue; } } BasicBlock *BB = getRegionNodeBasicBlock(RN); isl::set &Domain = DomainMap[BB]; assert(Domain); // Under the union of all predecessor conditions we can reach this block. isl::set PredDom = getPredecessorDomainConstraints(BB, Domain, DT, LI); Domain = Domain.intersect(PredDom).coalesce(); Domain = Domain.align_params(getParamSpace()); Loop *BBLoop = getRegionNodeLoop(RN, LI); if (BBLoop && BBLoop->getHeader() == BB && contains(BBLoop)) if (!addLoopBoundsToHeaderDomain(BBLoop, LI, InvalidDomainMap)) return false; } return true; } /// Create a map to map from a given iteration to a subsequent iteration. /// /// This map maps from SetSpace -> SetSpace where the dimensions @p Dim /// is incremented by one and all other dimensions are equal, e.g., /// [i0, i1, i2, i3] -> [i0, i1, i2 + 1, i3] /// /// if @p Dim is 2 and @p SetSpace has 4 dimensions. static isl::map createNextIterationMap(isl::space SetSpace, unsigned Dim) { isl::space MapSpace = SetSpace.map_from_set(); isl::map NextIterationMap = isl::map::universe(MapSpace); for (unsigned u = 0; u < NextIterationMap.dim(isl::dim::in); u++) if (u != Dim) NextIterationMap = NextIterationMap.equate(isl::dim::in, u, isl::dim::out, u); isl::constraint C = isl::constraint::alloc_equality(isl::local_space(MapSpace)); C = C.set_constant_si(1); C = C.set_coefficient_si(isl::dim::in, Dim, 1); C = C.set_coefficient_si(isl::dim::out, Dim, -1); NextIterationMap = NextIterationMap.add_constraint(C); return NextIterationMap; } bool Scop::addLoopBoundsToHeaderDomain( Loop *L, LoopInfo &LI, DenseMap &InvalidDomainMap) { int LoopDepth = getRelativeLoopDepth(L); assert(LoopDepth >= 0 && "Loop in region should have at least depth one"); BasicBlock *HeaderBB = L->getHeader(); assert(DomainMap.count(HeaderBB)); isl::set &HeaderBBDom = DomainMap[HeaderBB]; isl::map NextIterationMap = createNextIterationMap(HeaderBBDom.get_space(), LoopDepth); isl::set UnionBackedgeCondition = HeaderBBDom.empty(HeaderBBDom.get_space()); SmallVector LatchBlocks; L->getLoopLatches(LatchBlocks); for (BasicBlock *LatchBB : LatchBlocks) { // If the latch is only reachable via error statements we skip it. isl::set LatchBBDom = DomainMap.lookup(LatchBB); if (!LatchBBDom) continue; isl::set BackedgeCondition = nullptr; TerminatorInst *TI = LatchBB->getTerminator(); BranchInst *BI = dyn_cast(TI); assert(BI && "Only branch instructions allowed in loop latches"); if (BI->isUnconditional()) BackedgeCondition = LatchBBDom; else { SmallVector ConditionSets; int idx = BI->getSuccessor(0) != HeaderBB; if (!buildConditionSets(*this, LatchBB, TI, L, LatchBBDom.get(), InvalidDomainMap, ConditionSets)) return false; // Free the non back edge condition set as we do not need it. isl_set_free(ConditionSets[1 - idx]); BackedgeCondition = isl::manage(ConditionSets[idx]); } int LatchLoopDepth = getRelativeLoopDepth(LI.getLoopFor(LatchBB)); assert(LatchLoopDepth >= LoopDepth); BackedgeCondition = BackedgeCondition.project_out( isl::dim::set, LoopDepth + 1, LatchLoopDepth - LoopDepth); UnionBackedgeCondition = UnionBackedgeCondition.unite(BackedgeCondition); } isl::map ForwardMap = ForwardMap.lex_le(HeaderBBDom.get_space()); for (int i = 0; i < LoopDepth; i++) ForwardMap = ForwardMap.equate(isl::dim::in, i, isl::dim::out, i); isl::set UnionBackedgeConditionComplement = UnionBackedgeCondition.complement(); UnionBackedgeConditionComplement = UnionBackedgeConditionComplement.lower_bound_si(isl::dim::set, LoopDepth, 0); UnionBackedgeConditionComplement = UnionBackedgeConditionComplement.apply(ForwardMap); HeaderBBDom = HeaderBBDom.subtract(UnionBackedgeConditionComplement); HeaderBBDom = HeaderBBDom.apply(NextIterationMap); auto Parts = partitionSetParts(HeaderBBDom, LoopDepth); HeaderBBDom = Parts.second; // Check if there is a tagged AddRec for this loop and if so do not add // the bounded assumptions to the context as they are already implied by the // tag. if (Affinator.hasNSWAddRecForLoop(L)) return true; isl::set UnboundedCtx = Parts.first.params(); recordAssumption(INFINITELOOP, UnboundedCtx, HeaderBB->getTerminator()->getDebugLoc(), AS_RESTRICTION); return true; } MemoryAccess *Scop::lookupBasePtrAccess(MemoryAccess *MA) { Value *PointerBase = MA->getOriginalBaseAddr(); auto *PointerBaseInst = dyn_cast(PointerBase); if (!PointerBaseInst) return nullptr; auto *BasePtrStmt = getStmtFor(PointerBaseInst); if (!BasePtrStmt) return nullptr; return BasePtrStmt->getArrayAccessOrNULLFor(PointerBaseInst); } bool Scop::hasNonHoistableBasePtrInScop(MemoryAccess *MA, isl::union_map Writes) { if (auto *BasePtrMA = lookupBasePtrAccess(MA)) { return getNonHoistableCtx(BasePtrMA, Writes).is_null(); } Value *BaseAddr = MA->getOriginalBaseAddr(); if (auto *BasePtrInst = dyn_cast(BaseAddr)) if (!isa(BasePtrInst)) return contains(BasePtrInst); return false; } bool Scop::buildAliasChecks(AliasAnalysis &AA) { if (!PollyUseRuntimeAliasChecks) return true; if (buildAliasGroups(AA)) { // Aliasing assumptions do not go through addAssumption but we still want to // collect statistics so we do it here explicitly. if (MinMaxAliasGroups.size()) AssumptionsAliasing++; return true; } // If a problem occurs while building the alias groups we need to delete // this SCoP and pretend it wasn't valid in the first place. To this end // we make the assumed context infeasible. invalidate(ALIASING, DebugLoc()); LLVM_DEBUG( dbgs() << "\n\nNOTE: Run time checks for " << getNameStr() << " could not be created as the number of parameters involved " "is too high. The SCoP will be " "dismissed.\nUse:\n\t--polly-rtc-max-parameters=X\nto adjust " "the maximal number of parameters but be advised that the " "compile time might increase exponentially.\n\n"); return false; } std::tuple> Scop::buildAliasGroupsForAccesses(AliasAnalysis &AA) { AliasSetTracker AST(AA); DenseMap PtrToAcc; DenseSet HasWriteAccess; for (ScopStmt &Stmt : *this) { isl::set StmtDomain = Stmt.getDomain(); bool StmtDomainEmpty = StmtDomain.is_empty(); // Statements with an empty domain will never be executed. if (StmtDomainEmpty) continue; for (MemoryAccess *MA : Stmt) { if (MA->isScalarKind()) continue; if (!MA->isRead()) HasWriteAccess.insert(MA->getScopArrayInfo()); MemAccInst Acc(MA->getAccessInstruction()); if (MA->isRead() && isa(Acc)) PtrToAcc[cast(Acc)->getRawSource()] = MA; else PtrToAcc[Acc.getPointerOperand()] = MA; AST.add(Acc); } } AliasGroupVectorTy AliasGroups; for (AliasSet &AS : AST) { if (AS.isMustAlias() || AS.isForwardingAliasSet()) continue; AliasGroupTy AG; for (auto &PR : AS) AG.push_back(PtrToAcc[PR.getValue()]); if (AG.size() < 2) continue; AliasGroups.push_back(std::move(AG)); } return std::make_tuple(AliasGroups, HasWriteAccess); } void Scop::splitAliasGroupsByDomain(AliasGroupVectorTy &AliasGroups) { for (unsigned u = 0; u < AliasGroups.size(); u++) { AliasGroupTy NewAG; AliasGroupTy &AG = AliasGroups[u]; AliasGroupTy::iterator AGI = AG.begin(); isl_set *AGDomain = getAccessDomain(*AGI); while (AGI != AG.end()) { MemoryAccess *MA = *AGI; isl_set *MADomain = getAccessDomain(MA); if (isl_set_is_disjoint(AGDomain, MADomain)) { NewAG.push_back(MA); AGI = AG.erase(AGI); isl_set_free(MADomain); } else { AGDomain = isl_set_union(AGDomain, MADomain); AGI++; } } if (NewAG.size() > 1) AliasGroups.push_back(std::move(NewAG)); isl_set_free(AGDomain); } } bool Scop::buildAliasGroups(AliasAnalysis &AA) { // To create sound alias checks we perform the following steps: // o) We partition each group into read only and non read only accesses. // o) For each group with more than one base pointer we then compute minimal // and maximal accesses to each array of a group in read only and non // read only partitions separately. AliasGroupVectorTy AliasGroups; DenseSet HasWriteAccess; std::tie(AliasGroups, HasWriteAccess) = buildAliasGroupsForAccesses(AA); splitAliasGroupsByDomain(AliasGroups); for (AliasGroupTy &AG : AliasGroups) { if (!hasFeasibleRuntimeContext()) return false; { IslMaxOperationsGuard MaxOpGuard(getIslCtx().get(), OptComputeOut); bool Valid = buildAliasGroup(AG, HasWriteAccess); if (!Valid) return false; } if (isl_ctx_last_error(getIslCtx().get()) == isl_error_quota) { invalidate(COMPLEXITY, DebugLoc()); return false; } } return true; } bool Scop::buildAliasGroup(Scop::AliasGroupTy &AliasGroup, DenseSet HasWriteAccess) { AliasGroupTy ReadOnlyAccesses; AliasGroupTy ReadWriteAccesses; SmallPtrSet ReadWriteArrays; SmallPtrSet ReadOnlyArrays; if (AliasGroup.size() < 2) return true; for (MemoryAccess *Access : AliasGroup) { ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "PossibleAlias", Access->getAccessInstruction()) << "Possibly aliasing pointer, use restrict keyword."); const ScopArrayInfo *Array = Access->getScopArrayInfo(); if (HasWriteAccess.count(Array)) { ReadWriteArrays.insert(Array); ReadWriteAccesses.push_back(Access); } else { ReadOnlyArrays.insert(Array); ReadOnlyAccesses.push_back(Access); } } // If there are no read-only pointers, and less than two read-write pointers, // no alias check is needed. if (ReadOnlyAccesses.empty() && ReadWriteArrays.size() <= 1) return true; // If there is no read-write pointer, no alias check is needed. if (ReadWriteArrays.empty()) return true; // For non-affine accesses, no alias check can be generated as we cannot // compute a sufficiently tight lower and upper bound: bail out. for (MemoryAccess *MA : AliasGroup) { if (!MA->isAffine()) { invalidate(ALIASING, MA->getAccessInstruction()->getDebugLoc(), MA->getAccessInstruction()->getParent()); return false; } } // Ensure that for all memory accesses for which we generate alias checks, // their base pointers are available. for (MemoryAccess *MA : AliasGroup) { if (MemoryAccess *BasePtrMA = lookupBasePtrAccess(MA)) addRequiredInvariantLoad( cast(BasePtrMA->getAccessInstruction())); } MinMaxAliasGroups.emplace_back(); MinMaxVectorPairTy &pair = MinMaxAliasGroups.back(); MinMaxVectorTy &MinMaxAccessesReadWrite = pair.first; MinMaxVectorTy &MinMaxAccessesReadOnly = pair.second; bool Valid; Valid = calculateMinMaxAccess(ReadWriteAccesses, *this, MinMaxAccessesReadWrite); if (!Valid) return false; // Bail out if the number of values we need to compare is too large. // This is important as the number of comparisons grows quadratically with // the number of values we need to compare. if (MinMaxAccessesReadWrite.size() + ReadOnlyArrays.size() > RunTimeChecksMaxArraysPerGroup) return false; Valid = calculateMinMaxAccess(ReadOnlyAccesses, *this, MinMaxAccessesReadOnly); if (!Valid) return false; return true; } /// Get the smallest loop that contains @p S but is not in @p S. static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) { // Start with the smallest loop containing the entry and expand that // loop until it contains all blocks in the region. If there is a loop // containing all blocks in the region check if it is itself contained // and if so take the parent loop as it will be the smallest containing // the region but not contained by it. Loop *L = LI.getLoopFor(S.getEntry()); while (L) { bool AllContained = true; for (auto *BB : S.blocks()) AllContained &= L->contains(BB); if (AllContained) break; L = L->getParentLoop(); } return L ? (S.contains(L) ? L->getParentLoop() : L) : nullptr; } int Scop::NextScopID = 0; std::string Scop::CurrentFunc; int Scop::getNextID(std::string ParentFunc) { if (ParentFunc != CurrentFunc) { CurrentFunc = ParentFunc; NextScopID = 0; } return NextScopID++; } Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI, DominatorTree &DT, ScopDetection::DetectionContext &DC, OptimizationRemarkEmitter &ORE) : IslCtx(isl_ctx_alloc(), isl_ctx_free), SE(&ScalarEvolution), DT(&DT), R(R), name(None), HasSingleExitEdge(R.getExitingBlock()), DC(DC), ORE(ORE), Affinator(this, LI), ID(getNextID((*R.getEntry()->getParent()).getName().str())) { if (IslOnErrorAbort) isl_options_set_on_error(getIslCtx().get(), ISL_ON_ERROR_ABORT); buildContext(); } Scop::~Scop() = default; void Scop::foldSizeConstantsToRight() { isl_union_set *Accessed = isl_union_map_range(getAccesses().release()); for (auto Array : arrays()) { if (Array->getNumberOfDimensions() <= 1) continue; isl_space *Space = Array->getSpace().release(); Space = isl_space_align_params(Space, isl_union_set_get_space(Accessed)); if (!isl_union_set_contains(Accessed, Space)) { isl_space_free(Space); continue; } isl_set *Elements = isl_union_set_extract_set(Accessed, Space); isl_map *Transform = isl_map_universe(isl_space_map_from_set(Array->getSpace().release())); std::vector Int; int Dims = isl_set_dim(Elements, isl_dim_set); for (int i = 0; i < Dims; i++) { isl_set *DimOnly = isl_set_project_out(isl_set_copy(Elements), isl_dim_set, 0, i); DimOnly = isl_set_project_out(DimOnly, isl_dim_set, 1, Dims - i - 1); DimOnly = isl_set_lower_bound_si(DimOnly, isl_dim_set, 0, 0); isl_basic_set *DimHull = isl_set_affine_hull(DimOnly); if (i == Dims - 1) { Int.push_back(1); Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i); isl_basic_set_free(DimHull); continue; } if (isl_basic_set_dim(DimHull, isl_dim_div) == 1) { isl_aff *Diff = isl_basic_set_get_div(DimHull, 0); isl_val *Val = isl_aff_get_denominator_val(Diff); isl_aff_free(Diff); int ValInt = 1; if (isl_val_is_int(Val)) { auto ValAPInt = APIntFromVal(Val); if (ValAPInt.isSignedIntN(32)) ValInt = ValAPInt.getSExtValue(); } else { isl_val_free(Val); } Int.push_back(ValInt); isl_constraint *C = isl_constraint_alloc_equality( isl_local_space_from_space(isl_map_get_space(Transform))); C = isl_constraint_set_coefficient_si(C, isl_dim_out, i, ValInt); C = isl_constraint_set_coefficient_si(C, isl_dim_in, i, -1); Transform = isl_map_add_constraint(Transform, C); isl_basic_set_free(DimHull); continue; } isl_basic_set *ZeroSet = isl_basic_set_copy(DimHull); ZeroSet = isl_basic_set_fix_si(ZeroSet, isl_dim_set, 0, 0); int ValInt = 1; if (isl_basic_set_is_equal(ZeroSet, DimHull)) { ValInt = 0; } Int.push_back(ValInt); Transform = isl_map_equate(Transform, isl_dim_in, i, isl_dim_out, i); isl_basic_set_free(DimHull); isl_basic_set_free(ZeroSet); } isl_set *MappedElements = isl_map_domain(isl_map_copy(Transform)); if (!isl_set_is_subset(Elements, MappedElements)) { isl_set_free(Elements); isl_set_free(MappedElements); isl_map_free(Transform); continue; } isl_set_free(MappedElements); bool CanFold = true; if (Int[0] <= 1) CanFold = false; unsigned NumDims = Array->getNumberOfDimensions(); for (unsigned i = 1; i < NumDims - 1; i++) if (Int[0] != Int[i] && Int[i]) CanFold = false; if (!CanFold) { isl_set_free(Elements); isl_map_free(Transform); continue; } for (auto &Access : AccessFunctions) if (Access->getScopArrayInfo() == Array) Access->setAccessRelation(Access->getAccessRelation().apply_range( isl::manage_copy(Transform))); isl_map_free(Transform); std::vector Sizes; for (unsigned i = 0; i < NumDims; i++) { auto Size = Array->getDimensionSize(i); if (i == NumDims - 1) Size = SE->getMulExpr(Size, SE->getConstant(Size->getType(), Int[0])); Sizes.push_back(Size); } Array->updateSizes(Sizes, false /* CheckConsistency */); isl_set_free(Elements); } isl_union_set_free(Accessed); } void Scop::markFortranArrays() { for (ScopStmt &Stmt : Stmts) { for (MemoryAccess *MemAcc : Stmt) { Value *FAD = MemAcc->getFortranArrayDescriptor(); if (!FAD) continue; // TODO: const_cast-ing to edit ScopArrayInfo *SAI = const_cast(MemAcc->getLatestScopArrayInfo()); assert(SAI && "memory access into a Fortran array does not " "have an associated ScopArrayInfo"); SAI->applyAndSetFAD(FAD); } } } void Scop::finalizeAccesses() { updateAccessDimensionality(); foldSizeConstantsToRight(); foldAccessRelations(); assumeNoOutOfBounds(); markFortranArrays(); } void Scop::updateAccessDimensionality() { // Check all array accesses for each base pointer and find a (virtual) element // size for the base pointer that divides all access functions. for (ScopStmt &Stmt : *this) for (MemoryAccess *Access : Stmt) { if (!Access->isArrayKind()) continue; ScopArrayInfo *Array = const_cast(Access->getScopArrayInfo()); if (Array->getNumberOfDimensions() != 1) continue; unsigned DivisibleSize = Array->getElemSizeInBytes(); const SCEV *Subscript = Access->getSubscript(0); while (!isDivisible(Subscript, DivisibleSize, *SE)) DivisibleSize /= 2; auto *Ty = IntegerType::get(SE->getContext(), DivisibleSize * 8); Array->updateElementType(Ty); } for (auto &Stmt : *this) for (auto &Access : Stmt) Access->updateDimensionality(); } void Scop::foldAccessRelations() { for (auto &Stmt : *this) for (auto &Access : Stmt) Access->foldAccessRelation(); } void Scop::assumeNoOutOfBounds() { for (auto &Stmt : *this) for (auto &Access : Stmt) Access->assumeNoOutOfBound(); } void Scop::removeFromStmtMap(ScopStmt &Stmt) { for (Instruction *Inst : Stmt.getInstructions()) InstStmtMap.erase(Inst); if (Stmt.isRegionStmt()) { for (BasicBlock *BB : Stmt.getRegion()->blocks()) { StmtMap.erase(BB); // Skip entry basic block, as its instructions are already deleted as // part of the statement's instruction list. if (BB == Stmt.getEntryBlock()) continue; for (Instruction &Inst : *BB) InstStmtMap.erase(&Inst); } } else { auto StmtMapIt = StmtMap.find(Stmt.getBasicBlock()); if (StmtMapIt != StmtMap.end()) StmtMapIt->second.erase(std::remove(StmtMapIt->second.begin(), StmtMapIt->second.end(), &Stmt), StmtMapIt->second.end()); for (Instruction *Inst : Stmt.getInstructions()) InstStmtMap.erase(Inst); } } void Scop::removeStmts(std::function ShouldDelete, bool AfterHoisting) { for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) { if (!ShouldDelete(*StmtIt)) { StmtIt++; continue; } // Start with removing all of the statement's accesses including erasing it // from all maps that are pointing to them. // Make a temporary copy because removing MAs invalidates the iterator. SmallVector MAList(StmtIt->begin(), StmtIt->end()); for (MemoryAccess *MA : MAList) StmtIt->removeSingleMemoryAccess(MA, AfterHoisting); removeFromStmtMap(*StmtIt); StmtIt = Stmts.erase(StmtIt); } } void Scop::removeStmtNotInDomainMap() { auto ShouldDelete = [this](ScopStmt &Stmt) -> bool { return !this->DomainMap.lookup(Stmt.getEntryBlock()); }; removeStmts(ShouldDelete, false); } void Scop::simplifySCoP(bool AfterHoisting) { auto ShouldDelete = [AfterHoisting](ScopStmt &Stmt) -> bool { // Never delete statements that contain calls to debug functions. if (hasDebugCall(&Stmt)) return false; bool RemoveStmt = Stmt.isEmpty(); // Remove read only statements only after invariant load hoisting. if (!RemoveStmt && AfterHoisting) { bool OnlyRead = true; for (MemoryAccess *MA : Stmt) { if (MA->isRead()) continue; OnlyRead = false; break; } RemoveStmt = OnlyRead; } return RemoveStmt; }; removeStmts(ShouldDelete, AfterHoisting); } InvariantEquivClassTy *Scop::lookupInvariantEquivClass(Value *Val) { LoadInst *LInst = dyn_cast(Val); if (!LInst) return nullptr; if (Value *Rep = InvEquivClassVMap.lookup(LInst)) LInst = cast(Rep); Type *Ty = LInst->getType(); const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); for (auto &IAClass : InvariantEquivClasses) { if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType) continue; auto &MAs = IAClass.InvariantAccesses; for (auto *MA : MAs) if (MA->getAccessInstruction() == Val) return &IAClass; } return nullptr; } bool isAParameter(llvm::Value *maybeParam, const Function &F) { for (const llvm::Argument &Arg : F.args()) if (&Arg == maybeParam) return true; return false; } bool Scop::canAlwaysBeHoisted(MemoryAccess *MA, bool StmtInvalidCtxIsEmpty, bool MAInvalidCtxIsEmpty, bool NonHoistableCtxIsEmpty) { LoadInst *LInst = cast(MA->getAccessInstruction()); const DataLayout &DL = LInst->getParent()->getModule()->getDataLayout(); if (PollyAllowDereferenceOfAllFunctionParams && isAParameter(LInst->getPointerOperand(), getFunction())) return true; // TODO: We can provide more information for better but more expensive // results. if (!isDereferenceableAndAlignedPointer(LInst->getPointerOperand(), LInst->getAlignment(), DL)) return false; // If the location might be overwritten we do not hoist it unconditionally. // // TODO: This is probably too conservative. if (!NonHoistableCtxIsEmpty) return false; // If a dereferenceable load is in a statement that is modeled precisely we // can hoist it. if (StmtInvalidCtxIsEmpty && MAInvalidCtxIsEmpty) return true; // Even if the statement is not modeled precisely we can hoist the load if it // does not involve any parameters that might have been specialized by the // statement domain. for (unsigned u = 0, e = MA->getNumSubscripts(); u < e; u++) if (!isa(MA->getSubscript(u))) return false; return true; } void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) { if (InvMAs.empty()) return; isl::set StmtInvalidCtx = Stmt.getInvalidContext(); bool StmtInvalidCtxIsEmpty = StmtInvalidCtx.is_empty(); // Get the context under which the statement is executed but remove the error // context under which this statement is reached. isl::set DomainCtx = Stmt.getDomain().params(); DomainCtx = DomainCtx.subtract(StmtInvalidCtx); if (DomainCtx.n_basic_set() >= MaxDisjunctsInDomain) { auto *AccInst = InvMAs.front().MA->getAccessInstruction(); invalidate(COMPLEXITY, AccInst->getDebugLoc(), AccInst->getParent()); return; } // Project out all parameters that relate to loads in the statement. Otherwise // we could have cyclic dependences on the constraints under which the // hoisted loads are executed and we could not determine an order in which to // pre-load them. This happens because not only lower bounds are part of the // domain but also upper bounds. for (auto &InvMA : InvMAs) { auto *MA = InvMA.MA; Instruction *AccInst = MA->getAccessInstruction(); if (SE->isSCEVable(AccInst->getType())) { SetVector Values; for (const SCEV *Parameter : Parameters) { Values.clear(); findValues(Parameter, *SE, Values); if (!Values.count(AccInst)) continue; if (isl::id ParamId = getIdForParam(Parameter)) { int Dim = DomainCtx.find_dim_by_id(isl::dim::param, ParamId); if (Dim >= 0) DomainCtx = DomainCtx.eliminate(isl::dim::param, Dim, 1); } } } } for (auto &InvMA : InvMAs) { auto *MA = InvMA.MA; isl::set NHCtx = InvMA.NonHoistableCtx; // Check for another invariant access that accesses the same location as // MA and if found consolidate them. Otherwise create a new equivalence // class at the end of InvariantEquivClasses. LoadInst *LInst = cast(MA->getAccessInstruction()); Type *Ty = LInst->getType(); const SCEV *PointerSCEV = SE->getSCEV(LInst->getPointerOperand()); isl::set MAInvalidCtx = MA->getInvalidContext(); bool NonHoistableCtxIsEmpty = NHCtx.is_empty(); bool MAInvalidCtxIsEmpty = MAInvalidCtx.is_empty(); isl::set MACtx; // Check if we know that this pointer can be speculatively accessed. if (canAlwaysBeHoisted(MA, StmtInvalidCtxIsEmpty, MAInvalidCtxIsEmpty, NonHoistableCtxIsEmpty)) { MACtx = isl::set::universe(DomainCtx.get_space()); } else { MACtx = DomainCtx; MACtx = MACtx.subtract(MAInvalidCtx.unite(NHCtx)); MACtx = MACtx.gist_params(getContext()); } bool Consolidated = false; for (auto &IAClass : InvariantEquivClasses) { if (PointerSCEV != IAClass.IdentifyingPointer || Ty != IAClass.AccessType) continue; // If the pointer and the type is equal check if the access function wrt. // to the domain is equal too. It can happen that the domain fixes // parameter values and these can be different for distinct part of the // SCoP. If this happens we cannot consolidate the loads but need to // create a new invariant load equivalence class. auto &MAs = IAClass.InvariantAccesses; if (!MAs.empty()) { auto *LastMA = MAs.front(); isl::set AR = MA->getAccessRelation().range(); isl::set LastAR = LastMA->getAccessRelation().range(); bool SameAR = AR.is_equal(LastAR); if (!SameAR) continue; } // Add MA to the list of accesses that are in this class. MAs.push_front(MA); Consolidated = true; // Unify the execution context of the class and this statement. isl::set IAClassDomainCtx = IAClass.ExecutionContext; if (IAClassDomainCtx) IAClassDomainCtx = IAClassDomainCtx.unite(MACtx).coalesce(); else IAClassDomainCtx = MACtx; IAClass.ExecutionContext = IAClassDomainCtx; break; } if (Consolidated) continue; // If we did not consolidate MA, thus did not find an equivalence class // for it, we create a new one. InvariantEquivClasses.emplace_back( InvariantEquivClassTy{PointerSCEV, MemoryAccessList{MA}, MACtx, Ty}); } } /// Check if an access range is too complex. /// /// An access range is too complex, if it contains either many disjuncts or /// very complex expressions. As a simple heuristic, we assume if a set to /// be too complex if the sum of existentially quantified dimensions and /// set dimensions is larger than a threshold. This reliably detects both /// sets with many disjuncts as well as sets with many divisions as they /// arise in h264. /// /// @param AccessRange The range to check for complexity. /// /// @returns True if the access range is too complex. static bool isAccessRangeTooComplex(isl::set AccessRange) { unsigned NumTotalDims = 0; auto CountDimensions = [&NumTotalDims](isl::basic_set BSet) -> isl::stat { NumTotalDims += BSet.dim(isl::dim::div); NumTotalDims += BSet.dim(isl::dim::set); return isl::stat::ok; }; AccessRange.foreach_basic_set(CountDimensions); if (NumTotalDims > MaxDimensionsInAccessRange) return true; return false; } isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) { // TODO: Loads that are not loop carried, hence are in a statement with // zero iterators, are by construction invariant, though we // currently "hoist" them anyway. This is necessary because we allow // them to be treated as parameters (e.g., in conditions) and our code // generation would otherwise use the old value. auto &Stmt = *Access->getStatement(); BasicBlock *BB = Stmt.getEntryBlock(); if (Access->isScalarKind() || Access->isWrite() || !Access->isAffine() || Access->isMemoryIntrinsic()) return nullptr; // Skip accesses that have an invariant base pointer which is defined but // not loaded inside the SCoP. This can happened e.g., if a readnone call // returns a pointer that is used as a base address. However, as we want // to hoist indirect pointers, we allow the base pointer to be defined in // the region if it is also a memory access. Each ScopArrayInfo object // that has a base pointer origin has a base pointer that is loaded and // that it is invariant, thus it will be hoisted too. However, if there is // no base pointer origin we check that the base pointer is defined // outside the region. auto *LI = cast(Access->getAccessInstruction()); if (hasNonHoistableBasePtrInScop(Access, Writes)) return nullptr; isl::map AccessRelation = Access->getAccessRelation(); assert(!AccessRelation.is_empty()); if (AccessRelation.involves_dims(isl::dim::in, 0, Stmt.getNumIterators())) return nullptr; AccessRelation = AccessRelation.intersect_domain(Stmt.getDomain()); isl::set SafeToLoad; auto &DL = getFunction().getParent()->getDataLayout(); if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(), DL)) { SafeToLoad = isl::set::universe(AccessRelation.get_space().range()); } else if (BB != LI->getParent()) { // Skip accesses in non-affine subregions as they might not be executed // under the same condition as the entry of the non-affine subregion. return nullptr; } else { SafeToLoad = AccessRelation.range(); } if (isAccessRangeTooComplex(AccessRelation.range())) return nullptr; isl::union_map Written = Writes.intersect_range(SafeToLoad); isl::set WrittenCtx = Written.params(); bool IsWritten = !WrittenCtx.is_empty(); if (!IsWritten) return WrittenCtx; WrittenCtx = WrittenCtx.remove_divs(); bool TooComplex = WrittenCtx.n_basic_set() >= MaxDisjunctsInDomain; if (TooComplex || !isRequiredInvariantLoad(LI)) return nullptr; addAssumption(INVARIANTLOAD, WrittenCtx, LI->getDebugLoc(), AS_RESTRICTION, LI->getParent()); return WrittenCtx; } void Scop::verifyInvariantLoads() { auto &RIL = getRequiredInvariantLoads(); for (LoadInst *LI : RIL) { assert(LI && contains(LI)); // If there exists a statement in the scop which has a memory access for // @p LI, then mark this scop as infeasible for optimization. for (ScopStmt &Stmt : Stmts) if (Stmt.getArrayAccessOrNULLFor(LI)) { invalidate(INVARIANTLOAD, LI->getDebugLoc(), LI->getParent()); return; } } } void Scop::hoistInvariantLoads() { if (!PollyInvariantLoadHoisting) return; isl::union_map Writes = getWrites(); for (ScopStmt &Stmt : *this) { InvariantAccessesTy InvariantAccesses; for (MemoryAccess *Access : Stmt) if (isl::set NHCtx = getNonHoistableCtx(Access, Writes)) InvariantAccesses.push_back({Access, NHCtx}); // Transfer the memory access from the statement to the SCoP. for (auto InvMA : InvariantAccesses) Stmt.removeMemoryAccess(InvMA.MA); addInvariantLoads(Stmt, InvariantAccesses); } } /// Find the canonical scop array info object for a set of invariant load /// hoisted loads. The canonical array is the one that corresponds to the /// first load in the list of accesses which is used as base pointer of a /// scop array. static const ScopArrayInfo *findCanonicalArray(Scop *S, MemoryAccessList &Accesses) { for (MemoryAccess *Access : Accesses) { const ScopArrayInfo *CanonicalArray = S->getScopArrayInfoOrNull( Access->getAccessInstruction(), MemoryKind::Array); if (CanonicalArray) return CanonicalArray; } return nullptr; } /// Check if @p Array severs as base array in an invariant load. static bool isUsedForIndirectHoistedLoad(Scop *S, const ScopArrayInfo *Array) { for (InvariantEquivClassTy &EqClass2 : S->getInvariantAccesses()) for (MemoryAccess *Access2 : EqClass2.InvariantAccesses) if (Access2->getScopArrayInfo() == Array) return true; return false; } /// Replace the base pointer arrays in all memory accesses referencing @p Old, /// with a reference to @p New. static void replaceBasePtrArrays(Scop *S, const ScopArrayInfo *Old, const ScopArrayInfo *New) { for (ScopStmt &Stmt : *S) for (MemoryAccess *Access : Stmt) { if (Access->getLatestScopArrayInfo() != Old) continue; isl::id Id = New->getBasePtrId(); isl::map Map = Access->getAccessRelation(); Map = Map.set_tuple_id(isl::dim::out, Id); Access->setAccessRelation(Map); } } void Scop::canonicalizeDynamicBasePtrs() { for (InvariantEquivClassTy &EqClass : InvariantEquivClasses) { MemoryAccessList &BasePtrAccesses = EqClass.InvariantAccesses; const ScopArrayInfo *CanonicalBasePtrSAI = findCanonicalArray(this, BasePtrAccesses); if (!CanonicalBasePtrSAI) continue; for (MemoryAccess *BasePtrAccess : BasePtrAccesses) { const ScopArrayInfo *BasePtrSAI = getScopArrayInfoOrNull( BasePtrAccess->getAccessInstruction(), MemoryKind::Array); if (!BasePtrSAI || BasePtrSAI == CanonicalBasePtrSAI || !BasePtrSAI->isCompatibleWith(CanonicalBasePtrSAI)) continue; // we currently do not canonicalize arrays where some accesses are // hoisted as invariant loads. If we would, we need to update the access // function of the invariant loads as well. However, as this is not a // very common situation, we leave this for now to avoid further // complexity increases. if (isUsedForIndirectHoistedLoad(this, BasePtrSAI)) continue; replaceBasePtrArrays(this, BasePtrSAI, CanonicalBasePtrSAI); } } } ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType, ArrayRef Sizes, MemoryKind Kind, const char *BaseName) { assert((BasePtr || BaseName) && "BasePtr and BaseName can not be nullptr at the same time."); assert(!(BasePtr && BaseName) && "BaseName is redundant."); auto &SAI = BasePtr ? ScopArrayInfoMap[std::make_pair(BasePtr, Kind)] : ScopArrayNameMap[BaseName]; if (!SAI) { auto &DL = getFunction().getParent()->getDataLayout(); SAI.reset(new ScopArrayInfo(BasePtr, ElementType, getIslCtx(), Sizes, Kind, DL, this, BaseName)); ScopArrayInfoSet.insert(SAI.get()); } else { SAI->updateElementType(ElementType); // In case of mismatching array sizes, we bail out by setting the run-time // context to false. if (!SAI->updateSizes(Sizes)) invalidate(DELINEARIZATION, DebugLoc()); } return SAI.get(); } ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType, const std::string &BaseName, const std::vector &Sizes) { auto *DimSizeType = Type::getInt64Ty(getSE()->getContext()); std::vector SCEVSizes; for (auto size : Sizes) if (size) SCEVSizes.push_back(getSE()->getConstant(DimSizeType, size, false)); else SCEVSizes.push_back(nullptr); auto *SAI = getOrCreateScopArrayInfo(nullptr, ElementType, SCEVSizes, MemoryKind::Array, BaseName.c_str()); return SAI; } const ScopArrayInfo *Scop::getScopArrayInfoOrNull(Value *BasePtr, MemoryKind Kind) { auto *SAI = ScopArrayInfoMap[std::make_pair(BasePtr, Kind)].get(); return SAI; } const ScopArrayInfo *Scop::getScopArrayInfo(Value *BasePtr, MemoryKind Kind) { auto *SAI = getScopArrayInfoOrNull(BasePtr, Kind); assert(SAI && "No ScopArrayInfo available for this base pointer"); return SAI; } std::string Scop::getContextStr() const { return getContext().to_str(); } std::string Scop::getAssumedContextStr() const { assert(AssumedContext && "Assumed context not yet built"); return AssumedContext.to_str(); } std::string Scop::getInvalidContextStr() const { return InvalidContext.to_str(); } std::string Scop::getNameStr() const { std::string ExitName, EntryName; std::tie(EntryName, ExitName) = getEntryExitStr(); return EntryName + "---" + ExitName; } std::pair Scop::getEntryExitStr() const { std::string ExitName, EntryName; raw_string_ostream ExitStr(ExitName); raw_string_ostream EntryStr(EntryName); R.getEntry()->printAsOperand(EntryStr, false); EntryStr.str(); if (R.getExit()) { R.getExit()->printAsOperand(ExitStr, false); ExitStr.str(); } else ExitName = "FunctionExit"; return std::make_pair(EntryName, ExitName); } isl::set Scop::getContext() const { return Context; } isl::space Scop::getParamSpace() const { return getContext().get_space(); } isl::space Scop::getFullParamSpace() const { std::vector FortranIDs; FortranIDs = getFortranArrayIds(arrays()); isl::space Space = isl::space::params_alloc( getIslCtx(), ParameterIds.size() + FortranIDs.size()); unsigned PDim = 0; for (const SCEV *Parameter : Parameters) { isl::id Id = getIdForParam(Parameter); Space = Space.set_dim_id(isl::dim::param, PDim++, Id); } for (isl::id Id : FortranIDs) Space = Space.set_dim_id(isl::dim::param, PDim++, Id); return Space; } isl::set Scop::getAssumedContext() const { assert(AssumedContext && "Assumed context not yet built"); return AssumedContext; } bool Scop::isProfitable(bool ScalarsAreUnprofitable) const { if (PollyProcessUnprofitable) return true; if (isEmpty()) return false; unsigned OptimizableStmtsOrLoops = 0; for (auto &Stmt : *this) { if (Stmt.getNumIterators() == 0) continue; bool ContainsArrayAccs = false; bool ContainsScalarAccs = false; for (auto *MA : Stmt) { if (MA->isRead()) continue; ContainsArrayAccs |= MA->isLatestArrayKind(); ContainsScalarAccs |= MA->isLatestScalarKind(); } if (!ScalarsAreUnprofitable || (ContainsArrayAccs && !ContainsScalarAccs)) OptimizableStmtsOrLoops += Stmt.getNumIterators(); } return OptimizableStmtsOrLoops > 1; } bool Scop::hasFeasibleRuntimeContext() const { auto PositiveContext = getAssumedContext(); auto NegativeContext = getInvalidContext(); PositiveContext = addNonEmptyDomainConstraints(PositiveContext); // addNonEmptyDomainConstraints returns null if ScopStmts have a null domain if (!PositiveContext) return false; bool IsFeasible = !(PositiveContext.is_empty() || PositiveContext.is_subset(NegativeContext)); if (!IsFeasible) return false; auto DomainContext = getDomains().params(); IsFeasible = !DomainContext.is_subset(NegativeContext); IsFeasible &= !Context.is_subset(NegativeContext); return IsFeasible; } static std::string toString(AssumptionKind Kind) { switch (Kind) { case ALIASING: return "No-aliasing"; case INBOUNDS: return "Inbounds"; case WRAPPING: return "No-overflows"; case UNSIGNED: return "Signed-unsigned"; case COMPLEXITY: return "Low complexity"; case PROFITABLE: return "Profitable"; case ERRORBLOCK: return "No-error"; case INFINITELOOP: return "Finite loop"; case INVARIANTLOAD: return "Invariant load"; case DELINEARIZATION: return "Delinearization"; } llvm_unreachable("Unknown AssumptionKind!"); } bool Scop::isEffectiveAssumption(isl::set Set, AssumptionSign Sign) { if (Sign == AS_ASSUMPTION) { if (Context.is_subset(Set)) return false; if (AssumedContext.is_subset(Set)) return false; } else { if (Set.is_disjoint(Context)) return false; if (Set.is_subset(InvalidContext)) return false; } return true; } bool Scop::trackAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) { if (PollyRemarksMinimal && !isEffectiveAssumption(Set, Sign)) return false; // Do never emit trivial assumptions as they only clutter the output. if (!PollyRemarksMinimal) { isl::set Univ; if (Sign == AS_ASSUMPTION) Univ = isl::set::universe(Set.get_space()); bool IsTrivial = (Sign == AS_RESTRICTION && Set.is_empty()) || (Sign == AS_ASSUMPTION && Univ.is_equal(Set)); if (IsTrivial) return false; } switch (Kind) { case ALIASING: AssumptionsAliasing++; break; case INBOUNDS: AssumptionsInbounds++; break; case WRAPPING: AssumptionsWrapping++; break; case UNSIGNED: AssumptionsUnsigned++; break; case COMPLEXITY: AssumptionsComplexity++; break; case PROFITABLE: AssumptionsUnprofitable++; break; case ERRORBLOCK: AssumptionsErrorBlock++; break; case INFINITELOOP: AssumptionsInfiniteLoop++; break; case INVARIANTLOAD: AssumptionsInvariantLoad++; break; case DELINEARIZATION: AssumptionsDelinearization++; break; } auto Suffix = Sign == AS_ASSUMPTION ? " assumption:\t" : " restriction:\t"; std::string Msg = toString(Kind) + Suffix + Set.to_str(); if (BB) ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, BB) << Msg); else ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AssumpRestrict", Loc, R.getEntry()) << Msg); return true; } void Scop::addAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) { // Simplify the assumptions/restrictions first. Set = Set.gist_params(getContext()); if (!trackAssumption(Kind, Set, Loc, Sign, BB)) return; if (Sign == AS_ASSUMPTION) AssumedContext = AssumedContext.intersect(Set).coalesce(); else InvalidContext = InvalidContext.unite(Set).coalesce(); } void Scop::recordAssumption(AssumptionKind Kind, isl::set Set, DebugLoc Loc, AssumptionSign Sign, BasicBlock *BB) { assert((Set.is_params() || BB) && "Assumptions without a basic block must be parameter sets"); RecordedAssumptions.push_back({Kind, Sign, Set, Loc, BB}); } void Scop::addRecordedAssumptions() { while (!RecordedAssumptions.empty()) { Assumption AS = RecordedAssumptions.pop_back_val(); if (!AS.BB) { addAssumption(AS.Kind, AS.Set, AS.Loc, AS.Sign, nullptr /* BasicBlock */); continue; } // If the domain was deleted the assumptions are void. isl_set *Dom = getDomainConditions(AS.BB).release(); if (!Dom) continue; // If a basic block was given use its domain to simplify the assumption. // In case of restrictions we know they only have to hold on the domain, // thus we can intersect them with the domain of the block. However, for // assumptions the domain has to imply them, thus: // _ _____ // Dom => S <==> A v B <==> A - B // // To avoid the complement we will register A - B as a restriction not an // assumption. isl_set *S = AS.Set.copy(); if (AS.Sign == AS_RESTRICTION) S = isl_set_params(isl_set_intersect(S, Dom)); else /* (AS.Sign == AS_ASSUMPTION) */ S = isl_set_params(isl_set_subtract(Dom, S)); addAssumption(AS.Kind, isl::manage(S), AS.Loc, AS_RESTRICTION, AS.BB); } } void Scop::invalidate(AssumptionKind Kind, DebugLoc Loc, BasicBlock *BB) { LLVM_DEBUG(dbgs() << "Invalidate SCoP because of reason " << Kind << "\n"); addAssumption(Kind, isl::set::empty(getParamSpace()), Loc, AS_ASSUMPTION, BB); } isl::set Scop::getInvalidContext() const { return InvalidContext; } void Scop::printContext(raw_ostream &OS) const { OS << "Context:\n"; OS.indent(4) << Context << "\n"; OS.indent(4) << "Assumed Context:\n"; OS.indent(4) << AssumedContext << "\n"; OS.indent(4) << "Invalid Context:\n"; OS.indent(4) << InvalidContext << "\n"; unsigned Dim = 0; for (const SCEV *Parameter : Parameters) OS.indent(4) << "p" << Dim++ << ": " << *Parameter << "\n"; } void Scop::printAliasAssumptions(raw_ostream &OS) const { int noOfGroups = 0; for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) { if (Pair.second.size() == 0) noOfGroups += 1; else noOfGroups += Pair.second.size(); } OS.indent(4) << "Alias Groups (" << noOfGroups << "):\n"; if (MinMaxAliasGroups.empty()) { OS.indent(8) << "n/a\n"; return; } for (const MinMaxVectorPairTy &Pair : MinMaxAliasGroups) { // If the group has no read only accesses print the write accesses. if (Pair.second.empty()) { OS.indent(8) << "[["; for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) { OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second << ">"; } OS << " ]]\n"; } for (const MinMaxAccessTy &MMAReadOnly : Pair.second) { OS.indent(8) << "[["; OS << " <" << MMAReadOnly.first << ", " << MMAReadOnly.second << ">"; for (const MinMaxAccessTy &MMANonReadOnly : Pair.first) { OS << " <" << MMANonReadOnly.first << ", " << MMANonReadOnly.second << ">"; } OS << " ]]\n"; } } } void Scop::printStatements(raw_ostream &OS, bool PrintInstructions) const { OS << "Statements {\n"; for (const ScopStmt &Stmt : *this) { OS.indent(4); Stmt.print(OS, PrintInstructions); } OS.indent(4) << "}\n"; } void Scop::printArrayInfo(raw_ostream &OS) const { OS << "Arrays {\n"; for (auto &Array : arrays()) Array->print(OS); OS.indent(4) << "}\n"; OS.indent(4) << "Arrays (Bounds as pw_affs) {\n"; for (auto &Array : arrays()) Array->print(OS, /* SizeAsPwAff */ true); OS.indent(4) << "}\n"; } void Scop::print(raw_ostream &OS, bool PrintInstructions) const { OS.indent(4) << "Function: " << getFunction().getName() << "\n"; OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; OS.indent(4) << "Invariant Accesses: {\n"; for (const auto &IAClass : InvariantEquivClasses) { const auto &MAs = IAClass.InvariantAccesses; if (MAs.empty()) { OS.indent(12) << "Class Pointer: " << *IAClass.IdentifyingPointer << "\n"; } else { MAs.front()->print(OS); OS.indent(12) << "Execution Context: " << IAClass.ExecutionContext << "\n"; } } OS.indent(4) << "}\n"; printContext(OS.indent(4)); printArrayInfo(OS.indent(4)); printAliasAssumptions(OS); printStatements(OS.indent(4), PrintInstructions); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void Scop::dump() const { print(dbgs(), true); } #endif isl::ctx Scop::getIslCtx() const { return IslCtx.get(); } __isl_give PWACtx Scop::getPwAff(const SCEV *E, BasicBlock *BB, bool NonNegative) { // First try to use the SCEVAffinator to generate a piecewise defined // affine function from @p E in the context of @p BB. If that tasks becomes to // complex the affinator might return a nullptr. In such a case we invalidate // the SCoP and return a dummy value. This way we do not need to add error // handling code to all users of this function. auto PWAC = Affinator.getPwAff(E, BB); if (PWAC.first) { // TODO: We could use a heuristic and either use: // SCEVAffinator::takeNonNegativeAssumption // or // SCEVAffinator::interpretAsUnsigned // to deal with unsigned or "NonNegative" SCEVs. if (NonNegative) Affinator.takeNonNegativeAssumption(PWAC); return PWAC; } auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc(); invalidate(COMPLEXITY, DL, BB); return Affinator.getPwAff(SE->getZero(E->getType()), BB); } isl::union_set Scop::getDomains() const { isl_space *EmptySpace = isl_space_params_alloc(getIslCtx().get(), 0); isl_union_set *Domain = isl_union_set_empty(EmptySpace); for (const ScopStmt &Stmt : *this) Domain = isl_union_set_add_set(Domain, Stmt.getDomain().release()); return isl::manage(Domain); } isl::pw_aff Scop::getPwAffOnly(const SCEV *E, BasicBlock *BB) { PWACtx PWAC = getPwAff(E, BB); return PWAC.first; } isl::union_map Scop::getAccessesOfType(std::function Predicate) { isl::union_map Accesses = isl::union_map::empty(getParamSpace()); for (ScopStmt &Stmt : *this) { for (MemoryAccess *MA : Stmt) { if (!Predicate(*MA)) continue; isl::set Domain = Stmt.getDomain(); isl::map AccessDomain = MA->getAccessRelation(); AccessDomain = AccessDomain.intersect_domain(Domain); Accesses = Accesses.add_map(AccessDomain); } } return Accesses.coalesce(); } isl::union_map Scop::getMustWrites() { return getAccessesOfType([](MemoryAccess &MA) { return MA.isMustWrite(); }); } isl::union_map Scop::getMayWrites() { return getAccessesOfType([](MemoryAccess &MA) { return MA.isMayWrite(); }); } isl::union_map Scop::getWrites() { return getAccessesOfType([](MemoryAccess &MA) { return MA.isWrite(); }); } isl::union_map Scop::getReads() { return getAccessesOfType([](MemoryAccess &MA) { return MA.isRead(); }); } isl::union_map Scop::getAccesses() { return getAccessesOfType([](MemoryAccess &MA) { return true; }); } isl::union_map Scop::getAccesses(ScopArrayInfo *Array) { return getAccessesOfType( [Array](MemoryAccess &MA) { return MA.getScopArrayInfo() == Array; }); } // Check whether @p Node is an extension node. // // @return true if @p Node is an extension node. isl_bool isNotExtNode(__isl_keep isl_schedule_node *Node, void *User) { if (isl_schedule_node_get_type(Node) == isl_schedule_node_extension) return isl_bool_error; else return isl_bool_true; } bool Scop::containsExtensionNode(isl::schedule Schedule) { return isl_schedule_foreach_schedule_node_top_down( Schedule.get(), isNotExtNode, nullptr) == isl_stat_error; } isl::union_map Scop::getSchedule() const { auto Tree = getScheduleTree(); if (containsExtensionNode(Tree)) return nullptr; return Tree.get_map(); } isl::schedule Scop::getScheduleTree() const { return Schedule.intersect_domain(getDomains()); } void Scop::setSchedule(isl::union_map NewSchedule) { auto S = isl::schedule::from_domain(getDomains()); Schedule = S.insert_partial_schedule( isl::multi_union_pw_aff::from_union_map(NewSchedule)); ScheduleModified = true; } void Scop::setScheduleTree(isl::schedule NewSchedule) { Schedule = NewSchedule; ScheduleModified = true; } bool Scop::restrictDomains(isl::union_set Domain) { bool Changed = false; for (ScopStmt &Stmt : *this) { isl::union_set StmtDomain = isl::union_set(Stmt.getDomain()); isl::union_set NewStmtDomain = StmtDomain.intersect(Domain); if (StmtDomain.is_subset(NewStmtDomain)) continue; Changed = true; NewStmtDomain = NewStmtDomain.coalesce(); if (NewStmtDomain.is_empty()) Stmt.restrictDomain(isl::set::empty(Stmt.getDomainSpace())); else Stmt.restrictDomain(isl::set(NewStmtDomain)); } return Changed; } ScalarEvolution *Scop::getSE() const { return SE; } // Create an isl_multi_union_aff that defines an identity mapping from the // elements of USet to their N-th dimension. // // # Example: // // Domain: { A[i,j]; B[i,j,k] } // N: 1 // // Resulting Mapping: { {A[i,j] -> [(j)]; B[i,j,k] -> [(j)] } // // @param USet A union set describing the elements for which to generate a // mapping. // @param N The dimension to map to. // @returns A mapping from USet to its N-th dimension. static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) { assert(N >= 0); assert(USet); assert(!USet.is_empty()); auto Result = isl::union_pw_multi_aff::empty(USet.get_space()); auto Lambda = [&Result, N](isl::set S) -> isl::stat { int Dim = S.dim(isl::dim::set); auto PMA = isl::pw_multi_aff::project_out_map(S.get_space(), isl::dim::set, N, Dim - N); if (N > 1) PMA = PMA.drop_dims(isl::dim::out, 0, N - 1); Result = Result.add_pw_multi_aff(PMA); return isl::stat::ok; }; isl::stat Res = USet.foreach_set(Lambda); (void)Res; assert(Res == isl::stat::ok); return isl::multi_union_pw_aff(isl::union_pw_multi_aff(Result)); } void Scop::addScopStmt(BasicBlock *BB, StringRef Name, Loop *SurroundingLoop, std::vector Instructions) { assert(BB && "Unexpected nullptr!"); Stmts.emplace_back(*this, *BB, Name, SurroundingLoop, Instructions); auto *Stmt = &Stmts.back(); StmtMap[BB].push_back(Stmt); for (Instruction *Inst : Instructions) { assert(!InstStmtMap.count(Inst) && "Unexpected statement corresponding to the instruction."); InstStmtMap[Inst] = Stmt; } } void Scop::addScopStmt(Region *R, StringRef Name, Loop *SurroundingLoop, std::vector Instructions) { assert(R && "Unexpected nullptr!"); Stmts.emplace_back(*this, *R, Name, SurroundingLoop, Instructions); auto *Stmt = &Stmts.back(); for (Instruction *Inst : Instructions) { assert(!InstStmtMap.count(Inst) && "Unexpected statement corresponding to the instruction."); InstStmtMap[Inst] = Stmt; } for (BasicBlock *BB : R->blocks()) { StmtMap[BB].push_back(Stmt); if (BB == R->getEntry()) continue; for (Instruction &Inst : *BB) { assert(!InstStmtMap.count(&Inst) && "Unexpected statement corresponding to the instruction."); InstStmtMap[&Inst] = Stmt; } } } ScopStmt *Scop::addScopStmt(isl::map SourceRel, isl::map TargetRel, isl::set Domain) { #ifndef NDEBUG isl::set SourceDomain = SourceRel.domain(); isl::set TargetDomain = TargetRel.domain(); assert(Domain.is_subset(TargetDomain) && "Target access not defined for complete statement domain"); assert(Domain.is_subset(SourceDomain) && "Source access not defined for complete statement domain"); #endif Stmts.emplace_back(*this, SourceRel, TargetRel, Domain); CopyStmtsNum++; return &(Stmts.back()); } void Scop::buildSchedule(LoopInfo &LI) { Loop *L = getLoopSurroundingScop(*this, LI); LoopStackTy LoopStack({LoopStackElementTy(L, nullptr, 0)}); buildSchedule(getRegion().getNode(), LoopStack, LI); assert(LoopStack.size() == 1 && LoopStack.back().L == L); Schedule = LoopStack[0].Schedule; } /// To generate a schedule for the elements in a Region we traverse the Region /// in reverse-post-order and add the contained RegionNodes in traversal order /// to the schedule of the loop that is currently at the top of the LoopStack. /// For loop-free codes, this results in a correct sequential ordering. /// /// Example: /// bb1(0) /// / \. /// bb2(1) bb3(2) /// \ / \. /// bb4(3) bb5(4) /// \ / /// bb6(5) /// /// Including loops requires additional processing. Whenever a loop header is /// encountered, the corresponding loop is added to the @p LoopStack. Starting /// from an empty schedule, we first process all RegionNodes that are within /// this loop and complete the sequential schedule at this loop-level before /// processing about any other nodes. To implement this /// loop-nodes-first-processing, the reverse post-order traversal is /// insufficient. Hence, we additionally check if the traversal yields /// sub-regions or blocks that are outside the last loop on the @p LoopStack. /// These region-nodes are then queue and only traverse after the all nodes /// within the current loop have been processed. void Scop::buildSchedule(Region *R, LoopStackTy &LoopStack, LoopInfo &LI) { Loop *OuterScopLoop = getLoopSurroundingScop(*this, LI); ReversePostOrderTraversal RTraversal(R); std::deque WorkList(RTraversal.begin(), RTraversal.end()); std::deque DelayList; bool LastRNWaiting = false; // Iterate over the region @p R in reverse post-order but queue // sub-regions/blocks iff they are not part of the last encountered but not // completely traversed loop. The variable LastRNWaiting is a flag to indicate // that we queued the last sub-region/block from the reverse post-order // iterator. If it is set we have to explore the next sub-region/block from // the iterator (if any) to guarantee progress. If it is not set we first try // the next queued sub-region/blocks. while (!WorkList.empty() || !DelayList.empty()) { RegionNode *RN; if ((LastRNWaiting && !WorkList.empty()) || DelayList.empty()) { RN = WorkList.front(); WorkList.pop_front(); LastRNWaiting = false; } else { RN = DelayList.front(); DelayList.pop_front(); } Loop *L = getRegionNodeLoop(RN, LI); if (!contains(L)) L = OuterScopLoop; Loop *LastLoop = LoopStack.back().L; if (LastLoop != L) { if (LastLoop && !LastLoop->contains(L)) { LastRNWaiting = true; DelayList.push_back(RN); continue; } LoopStack.push_back({L, nullptr, 0}); } buildSchedule(RN, LoopStack, LI); } } void Scop::buildSchedule(RegionNode *RN, LoopStackTy &LoopStack, LoopInfo &LI) { if (RN->isSubRegion()) { auto *LocalRegion = RN->getNodeAs(); if (!isNonAffineSubRegion(LocalRegion)) { buildSchedule(LocalRegion, LoopStack, LI); return; } } assert(LoopStack.rbegin() != LoopStack.rend()); auto LoopData = LoopStack.rbegin(); LoopData->NumBlocksProcessed += getNumBlocksInRegionNode(RN); for (auto *Stmt : getStmtListFor(RN)) { isl::union_set UDomain{Stmt->getDomain()}; auto StmtSchedule = isl::schedule::from_domain(UDomain); LoopData->Schedule = combineInSequence(LoopData->Schedule, StmtSchedule); } // Check if we just processed the last node in this loop. If we did, finalize // the loop by: // // - adding new schedule dimensions // - folding the resulting schedule into the parent loop schedule // - dropping the loop schedule from the LoopStack. // // Then continue to check surrounding loops, which might also have been // completed by this node. size_t Dimension = LoopStack.size(); while (LoopData->L && LoopData->NumBlocksProcessed == getNumBlocksInLoop(LoopData->L)) { isl::schedule Schedule = LoopData->Schedule; auto NumBlocksProcessed = LoopData->NumBlocksProcessed; assert(std::next(LoopData) != LoopStack.rend()); ++LoopData; --Dimension; if (Schedule) { isl::union_set Domain = Schedule.get_domain(); isl::multi_union_pw_aff MUPA = mapToDimension(Domain, Dimension); Schedule = Schedule.insert_partial_schedule(MUPA); LoopData->Schedule = combineInSequence(LoopData->Schedule, Schedule); } LoopData->NumBlocksProcessed += NumBlocksProcessed; } // Now pop all loops processed up there from the LoopStack LoopStack.erase(LoopStack.begin() + Dimension, LoopStack.end()); } ArrayRef Scop::getStmtListFor(BasicBlock *BB) const { auto StmtMapIt = StmtMap.find(BB); if (StmtMapIt == StmtMap.end()) return {}; return StmtMapIt->second; } ScopStmt *Scop::getIncomingStmtFor(const Use &U) const { auto *PHI = cast(U.getUser()); BasicBlock *IncomingBB = PHI->getIncomingBlock(U); // If the value is a non-synthesizable from the incoming block, use the // statement that contains it as user statement. if (auto *IncomingInst = dyn_cast(U.get())) { if (IncomingInst->getParent() == IncomingBB) { if (ScopStmt *IncomingStmt = getStmtFor(IncomingInst)) return IncomingStmt; } } // Otherwise, use the epilogue/last statement. return getLastStmtFor(IncomingBB); } ScopStmt *Scop::getLastStmtFor(BasicBlock *BB) const { ArrayRef StmtList = getStmtListFor(BB); if (!StmtList.empty()) return StmtList.back(); return nullptr; } ArrayRef Scop::getStmtListFor(RegionNode *RN) const { if (RN->isSubRegion()) return getStmtListFor(RN->getNodeAs()); return getStmtListFor(RN->getNodeAs()); } ArrayRef Scop::getStmtListFor(Region *R) const { return getStmtListFor(R->getEntry()); } int Scop::getRelativeLoopDepth(const Loop *L) const { if (!L || !R.contains(L)) return -1; // outermostLoopInRegion always returns nullptr for top level regions if (R.isTopLevelRegion()) { // LoopInfo's depths start at 1, we start at 0 return L->getLoopDepth() - 1; } else { Loop *OuterLoop = R.outermostLoopInRegion(const_cast(L)); assert(OuterLoop); return L->getLoopDepth() - OuterLoop->getLoopDepth(); } } ScopArrayInfo *Scop::getArrayInfoByName(const std::string BaseName) { for (auto &SAI : arrays()) { if (SAI->getName() == BaseName) return SAI; } return nullptr; } void Scop::addAccessData(MemoryAccess *Access) { const ScopArrayInfo *SAI = Access->getOriginalScopArrayInfo(); assert(SAI && "can only use after access relations have been constructed"); if (Access->isOriginalValueKind() && Access->isRead()) ValueUseAccs[SAI].push_back(Access); else if (Access->isOriginalAnyPHIKind() && Access->isWrite()) PHIIncomingAccs[SAI].push_back(Access); } void Scop::removeAccessData(MemoryAccess *Access) { if (Access->isOriginalValueKind() && Access->isWrite()) { ValueDefAccs.erase(Access->getAccessValue()); } else if (Access->isOriginalValueKind() && Access->isRead()) { auto &Uses = ValueUseAccs[Access->getScopArrayInfo()]; auto NewEnd = std::remove(Uses.begin(), Uses.end(), Access); Uses.erase(NewEnd, Uses.end()); } else if (Access->isOriginalPHIKind() && Access->isRead()) { PHINode *PHI = cast(Access->getAccessInstruction()); PHIReadAccs.erase(PHI); } else if (Access->isOriginalAnyPHIKind() && Access->isWrite()) { auto &Incomings = PHIIncomingAccs[Access->getScopArrayInfo()]; auto NewEnd = std::remove(Incomings.begin(), Incomings.end(), Access); Incomings.erase(NewEnd, Incomings.end()); } } MemoryAccess *Scop::getValueDef(const ScopArrayInfo *SAI) const { assert(SAI->isValueKind()); Instruction *Val = dyn_cast(SAI->getBasePtr()); if (!Val) return nullptr; return ValueDefAccs.lookup(Val); } ArrayRef Scop::getValueUses(const ScopArrayInfo *SAI) const { assert(SAI->isValueKind()); auto It = ValueUseAccs.find(SAI); if (It == ValueUseAccs.end()) return {}; return It->second; } MemoryAccess *Scop::getPHIRead(const ScopArrayInfo *SAI) const { assert(SAI->isPHIKind() || SAI->isExitPHIKind()); if (SAI->isExitPHIKind()) return nullptr; PHINode *PHI = cast(SAI->getBasePtr()); return PHIReadAccs.lookup(PHI); } ArrayRef Scop::getPHIIncomings(const ScopArrayInfo *SAI) const { assert(SAI->isPHIKind() || SAI->isExitPHIKind()); auto It = PHIIncomingAccs.find(SAI); if (It == PHIIncomingAccs.end()) return {}; return It->second; } bool Scop::isEscaping(Instruction *Inst) { assert(contains(Inst) && "The concept of escaping makes only sense for " "values defined inside the SCoP"); for (Use &Use : Inst->uses()) { BasicBlock *UserBB = getUseBlock(Use); if (!contains(UserBB)) return true; // When the SCoP region exit needs to be simplified, PHIs in the region exit // move to a new basic block such that its incoming blocks are not in the // SCoP anymore. if (hasSingleExitEdge() && isa(Use.getUser()) && isExit(cast(Use.getUser())->getParent())) return true; } return false; } Scop::ScopStatistics Scop::getStatistics() const { ScopStatistics Result; #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) auto LoopStat = ScopDetection::countBeneficialLoops(&R, *SE, *getLI(), 0); int NumTotalLoops = LoopStat.NumLoops; Result.NumBoxedLoops = getBoxedLoops().size(); Result.NumAffineLoops = NumTotalLoops - Result.NumBoxedLoops; for (const ScopStmt &Stmt : *this) { isl::set Domain = Stmt.getDomain().intersect_params(getContext()); bool IsInLoop = Stmt.getNumIterators() >= 1; for (MemoryAccess *MA : Stmt) { if (!MA->isWrite()) continue; if (MA->isLatestValueKind()) { Result.NumValueWrites += 1; if (IsInLoop) Result.NumValueWritesInLoops += 1; } if (MA->isLatestAnyPHIKind()) { Result.NumPHIWrites += 1; if (IsInLoop) Result.NumPHIWritesInLoops += 1; } isl::set AccSet = MA->getAccessRelation().intersect_domain(Domain).range(); if (AccSet.is_singleton()) { Result.NumSingletonWrites += 1; if (IsInLoop) Result.NumSingletonWritesInLoops += 1; } } } #endif return Result; } raw_ostream &polly::operator<<(raw_ostream &OS, const Scop &scop) { scop.print(OS, PollyPrintInstructions); return OS; } //===----------------------------------------------------------------------===// void ScopInfoRegionPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } void updateLoopCountStatistic(ScopDetection::LoopStats Stats, Scop::ScopStatistics ScopStats) { assert(Stats.NumLoops == ScopStats.NumAffineLoops + ScopStats.NumBoxedLoops); NumScops++; NumLoopsInScop += Stats.NumLoops; MaxNumLoopsInScop = std::max(MaxNumLoopsInScop.getValue(), (unsigned)Stats.NumLoops); if (Stats.MaxDepth == 0) NumScopsDepthZero++; else if (Stats.MaxDepth == 1) NumScopsDepthOne++; else if (Stats.MaxDepth == 2) NumScopsDepthTwo++; else if (Stats.MaxDepth == 3) NumScopsDepthThree++; else if (Stats.MaxDepth == 4) NumScopsDepthFour++; else if (Stats.MaxDepth == 5) NumScopsDepthFive++; else NumScopsDepthLarger++; NumAffineLoops += ScopStats.NumAffineLoops; NumBoxedLoops += ScopStats.NumBoxedLoops; NumValueWrites += ScopStats.NumValueWrites; NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; NumPHIWrites += ScopStats.NumPHIWrites; NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; NumSingletonWrites += ScopStats.NumSingletonWrites; NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; } bool ScopInfoRegionPass::runOnRegion(Region *R, RGPassManager &RGM) { auto &SD = getAnalysis().getSD(); if (!SD.isMaxRegionInScop(*R)) return false; Function *F = R->getEntry()->getParent(); auto &SE = getAnalysis().getSE(); auto &LI = getAnalysis().getLoopInfo(); auto &AA = getAnalysis().getAAResults(); auto const &DL = F->getParent()->getDataLayout(); auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(*F); auto &ORE = getAnalysis().getORE(); ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE); S = SB.getScop(); // take ownership of scop object #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) if (S) { ScopDetection::LoopStats Stats = ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0); updateLoopCountStatistic(Stats, S->getStatistics()); } #endif return false; } void ScopInfoRegionPass::print(raw_ostream &OS, const Module *) const { if (S) S->print(OS, PollyPrintInstructions); else OS << "Invalid Scop!\n"; } char ScopInfoRegionPass::ID = 0; Pass *polly::createScopInfoRegionPassPass() { return new ScopInfoRegionPass(); } INITIALIZE_PASS_BEGIN(ScopInfoRegionPass, "polly-scops", "Polly - Create polyhedral description of Scops", false, false); INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker); INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); INITIALIZE_PASS_END(ScopInfoRegionPass, "polly-scops", "Polly - Create polyhedral description of Scops", false, false) //===----------------------------------------------------------------------===// ScopInfo::ScopInfo(const DataLayout &DL, ScopDetection &SD, ScalarEvolution &SE, LoopInfo &LI, AliasAnalysis &AA, DominatorTree &DT, AssumptionCache &AC, OptimizationRemarkEmitter &ORE) : DL(DL), SD(SD), SE(SE), LI(LI), AA(AA), DT(DT), AC(AC), ORE(ORE) { recompute(); } void ScopInfo::recompute() { RegionToScopMap.clear(); /// Create polyhedral description of scops for all the valid regions of a /// function. for (auto &It : SD) { Region *R = const_cast(It); if (!SD.isMaxRegionInScop(*R)) continue; ScopBuilder SB(R, AC, AA, DL, DT, LI, SD, SE, ORE); std::unique_ptr S = SB.getScop(); if (!S) continue; #if !defined(NDEBUG) || defined(LLVM_ENABLE_STATS) ScopDetection::LoopStats Stats = ScopDetection::countBeneficialLoops(&S->getRegion(), SE, LI, 0); updateLoopCountStatistic(Stats, S->getStatistics()); #endif bool Inserted = RegionToScopMap.insert({R, std::move(S)}).second; assert(Inserted && "Building Scop for the same region twice!"); (void)Inserted; } } bool ScopInfo::invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv) { // Check whether the analysis, all analyses on functions have been preserved // or anything we're holding references to is being invalidated auto PAC = PA.getChecker(); return !(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA); } AnalysisKey ScopInfoAnalysis::Key; ScopInfoAnalysis::Result ScopInfoAnalysis::run(Function &F, FunctionAnalysisManager &FAM) { auto &SD = FAM.getResult(F); auto &SE = FAM.getResult(F); auto &LI = FAM.getResult(F); auto &AA = FAM.getResult(F); auto &DT = FAM.getResult(F); auto &AC = FAM.getResult(F); auto &DL = F.getParent()->getDataLayout(); auto &ORE = FAM.getResult(F); return {DL, SD, SE, LI, AA, DT, AC, ORE}; } PreservedAnalyses ScopInfoPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { auto &SI = FAM.getResult(F); // Since the legacy PM processes Scops in bottom up, we print them in reverse // order here to keep the output persistent for (auto &It : reverse(SI)) { if (It.second) It.second->print(Stream, PollyPrintInstructions); else Stream << "Invalid Scop!\n"; } return PreservedAnalyses::all(); } void ScopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.setPreservesAll(); } bool ScopInfoWrapperPass::runOnFunction(Function &F) { auto &SD = getAnalysis().getSD(); auto &SE = getAnalysis().getSE(); auto &LI = getAnalysis().getLoopInfo(); auto &AA = getAnalysis().getAAResults(); auto const &DL = F.getParent()->getDataLayout(); auto &DT = getAnalysis().getDomTree(); auto &AC = getAnalysis().getAssumptionCache(F); auto &ORE = getAnalysis().getORE(); Result.reset(new ScopInfo{DL, SD, SE, LI, AA, DT, AC, ORE}); return false; } void ScopInfoWrapperPass::print(raw_ostream &OS, const Module *) const { for (auto &It : *Result) { if (It.second) It.second->print(OS, PollyPrintInstructions); else OS << "Invalid Scop!\n"; } } char ScopInfoWrapperPass::ID = 0; Pass *polly::createScopInfoWrapperPassPass() { return new ScopInfoWrapperPass(); } INITIALIZE_PASS_BEGIN( ScopInfoWrapperPass, "polly-function-scops", "Polly - Create polyhedral description of all Scops of a function", false, false); INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass); INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker); INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); INITIALIZE_PASS_DEPENDENCY(ScopDetectionWrapperPass); INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); INITIALIZE_PASS_END( ScopInfoWrapperPass, "polly-function-scops", "Polly - Create polyhedral description of all Scops of a function", false, false)