//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Move instructions between statements. // //===----------------------------------------------------------------------===// #include "polly/ForwardOpTree.h" #include "polly/Options.h" #include "polly/ScopBuilder.h" #include "polly/ScopInfo.h" #include "polly/ScopPass.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ISLOStream.h" #include "polly/Support/ISLTools.h" #include "polly/Support/VirtualInstruction.h" #include "polly/ZoneAlgo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "isl/ctx.h" #include "isl/isl-noexceptions.h" #include #include #define DEBUG_TYPE "polly-optree" using namespace llvm; using namespace polly; static cl::opt AnalyzeKnown("polly-optree-analyze-known", cl::desc("Analyze array contents for load forwarding"), cl::cat(PollyCategory), cl::init(true), cl::Hidden); static cl::opt NormalizePHIs("polly-optree-normalize-phi", cl::desc("Replace PHIs by their incoming values"), cl::cat(PollyCategory), cl::init(false), cl::Hidden); static cl::opt MaxOps("polly-optree-max-ops", cl::desc("Maximum number of ISL operations to invest for known " "analysis; 0=no limit"), cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); STATISTIC(KnownOutOfQuota, "Analyses aborted because max_operations was reached"); STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); STATISTIC(TotalKnownLoadsForwarded, "Number of forwarded loads because their value was known"); STATISTIC(TotalReloads, "Number of reloaded values"); STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); STATISTIC(TotalModifiedStmts, "Number of statements with at least one forwarded tree"); STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); STATISTIC(NumValueWritesInLoops, "Number of scalar value writes nested in affine loops after OpTree"); STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); STATISTIC(NumPHIWritesInLoops, "Number of scalar phi writes nested in affine loops after OpTree"); STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); STATISTIC(NumSingletonWritesInLoops, "Number of singleton writes nested in affine loops after OpTree"); namespace { /// The state of whether an operand tree was/can be forwarded. /// /// The items apply to an instructions and its operand tree with the instruction /// as the root element. If the value in question is not an instruction in the /// SCoP, it can be a leaf of an instruction's operand tree. enum ForwardingDecision { /// The root instruction or value cannot be forwarded at all. FD_CannotForward, /// The root instruction or value can be forwarded as a leaf of a larger /// operand tree. /// It does not make sense to move the value itself, it would just replace it /// by a use of itself. For instance, a constant "5" used in a statement can /// be forwarded, but it would just replace it by the same constant "5". /// However, it makes sense to move as an operand of /// /// %add = add 5, 5 /// /// where "5" is moved as part of a larger operand tree. "5" would be placed /// (disregarding for a moment that literal constants don't have a location /// and can be used anywhere) into the same statement as %add would. FD_CanForwardLeaf, /// The root instruction can be forwarded and doing so avoids a scalar /// dependency. /// /// This can be either because the operand tree can be moved to the target /// statement, or a memory access is redirected to read from a different /// location. FD_CanForwardProfitably, /// Used to indicate that a forwarding has be carried out successfully, and /// the forwarded memory access can be deleted. FD_DidForwardTree, /// Used to indicate that a forwarding has be carried out successfully, and /// the forwarded memory access is being reused. FD_DidForwardLeaf, /// A forwarding method cannot be applied to the operand tree. /// The difference to FD_CannotForward is that there might be other methods /// that can handle it. /// The conditions that make an operand tree applicable must be checked even /// with DoIt==true because a method following the one that returned /// FD_NotApplicable might have returned FD_CanForwardTree. FD_NotApplicable }; /// Implementation of operand tree forwarding for a specific SCoP. /// /// For a statement that requires a scalar value (through a value read /// MemoryAccess), see if its operand can be moved into the statement. If so, /// the MemoryAccess is removed and the all the operand tree instructions are /// moved into the statement. All original instructions are left in the source /// statements. The simplification pass can clean these up. class ForwardOpTreeImpl : ZoneAlgorithm { private: /// Scope guard to limit the number of isl operations for this pass. IslMaxOperationsGuard &MaxOpGuard; /// How many instructions have been copied to other statements. int NumInstructionsCopied = 0; /// Number of loads forwarded because their value was known. int NumKnownLoadsForwarded = 0; /// Number of values reloaded from known array elements. int NumReloads = 0; /// How many read-only accesses have been copied. int NumReadOnlyCopied = 0; /// How many operand trees have been forwarded. int NumForwardedTrees = 0; /// Number of statements with at least one forwarded operand tree. int NumModifiedStmts = 0; /// Whether we carried out at least one change to the SCoP. bool Modified = false; /// Contains the zones where array elements are known to contain a specific /// value. /// { [Element[] -> Zone[]] -> ValInst[] } /// @see computeKnown() isl::union_map Known; /// Translator for newly introduced ValInsts to already existing ValInsts such /// that new introduced load instructions can reuse the Known analysis of its /// original load. { ValInst[] -> ValInst[] } isl::union_map Translator; /// Get list of array elements that do contain the same ValInst[] at Domain[]. /// /// @param ValInst { Domain[] -> ValInst[] } /// The values for which we search for alternative locations, /// per statement instance. /// /// @return { Domain[] -> Element[] } /// For each statement instance, the array elements that contain the /// same ValInst. isl::union_map findSameContentElements(isl::union_map ValInst) { assert(!ValInst.is_single_valued().is_false()); // { Domain[] } isl::union_set Domain = ValInst.domain(); // { Domain[] -> Scatter[] } isl::union_map Schedule = getScatterFor(Domain); // { Element[] -> [Scatter[] -> ValInst[]] } isl::union_map MustKnownCurried = convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); // { [Domain[] -> ValInst[]] -> Scatter[] } isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } isl::union_map SchedValDomVal = DomValSched.range_product(ValInst.range_map()).reverse(); // { Element[] -> [Domain[] -> ValInst[]] } isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); // { Domain[] -> Element[] } isl::union_map MustKnownMap = MustKnownInst.uncurry().domain().unwrap().reverse(); simplify(MustKnownMap); return MustKnownMap; } /// Find a single array element for each statement instance, within a single /// array. /// /// @param MustKnown { Domain[] -> Element[] } /// Set of candidate array elements. /// @param Domain { Domain[] } /// The statement instance for which we need elements for. /// /// @return { Domain[] -> Element[] } /// For each statement instance, an array element out of @p MustKnown. /// All array elements must be in the same array (Polly does not yet /// support reading from different accesses using the same /// MemoryAccess). If no mapping for all of @p Domain exists, returns /// null. isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { // { Domain[] -> Element[] } isl::map Result; // MemoryAccesses can read only elements from a single array // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). // Look through all spaces until we find one that contains at least the // wanted statement instance.s for (isl::map Map : MustKnown.get_map_list()) { // Get the array this is accessing. isl::id ArrayId = Map.get_tuple_id(isl::dim::out); ScopArrayInfo *SAI = static_cast(ArrayId.get_user()); // No support for generation of indirect array accesses. if (SAI->getBasePtrOriginSAI()) continue; // Determine whether this map contains all wanted values. isl::set MapDom = Map.domain(); if (!Domain.is_subset(MapDom).is_true()) continue; // There might be multiple array elements that contain the same value, but // choose only one of them. lexmin is used because it returns a one-value // mapping, we do not care about which one. // TODO: Get the simplest access function. Result = Map.lexmin(); break; } return Result; } public: ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} /// Compute the zones of known array element contents. /// /// @return True if the computed #Known is usable. bool computeKnownValues() { isl::union_map MustKnown, KnownFromLoad, KnownFromInit; // Check that nothing strange occurs. collectCompatibleElts(); { IslQuotaScope QuotaScope = MaxOpGuard.enter(); computeCommon(); if (NormalizePHIs) computeNormalizedPHIs(); Known = computeKnown(true, true); // Preexisting ValInsts use the known content analysis of themselves. Translator = makeIdentityMap(Known.range(), false); } if (!Known || !Translator || !NormalizeMap) { assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); Known = nullptr; Translator = nullptr; NormalizeMap = nullptr; LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); return false; } KnownAnalyzed++; LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); return true; } void printStatistics(raw_ostream &OS, int Indent = 0) { OS.indent(Indent) << "Statistics {\n"; OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied << '\n'; OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded << '\n'; OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied << '\n'; OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees << '\n'; OS.indent(Indent + 4) << "Statements with forwarded operand trees: " << NumModifiedStmts << '\n'; OS.indent(Indent) << "}\n"; } void printStatements(raw_ostream &OS, int Indent = 0) const { OS.indent(Indent) << "After statements {\n"; for (auto &Stmt : *S) { OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; for (auto *MA : Stmt) MA->print(OS); OS.indent(Indent + 12); Stmt.printInstructions(OS); } OS.indent(Indent) << "}\n"; } /// Create a new MemoryAccess of type read and MemoryKind::Array. /// /// @param Stmt The statement in which the access occurs. /// @param LI The instruction that does the access. /// @param AccessRelation The array element that each statement instance /// accesses. /// /// @param The newly created access. MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, isl::map AccessRelation) { isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); ScopArrayInfo *SAI = reinterpret_cast(ArrayId.get_user()); // Create a dummy SCEV access, to be replaced anyway. SmallVector Sizes; Sizes.reserve(SAI->getNumberOfDimensions()); SmallVector Subscripts; Subscripts.reserve(SAI->getNumberOfDimensions()); for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { Sizes.push_back(SAI->getDimensionSize(i)); Subscripts.push_back(nullptr); } MemoryAccess *Access = new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); S->addAccessFunction(Access); Stmt->addAccess(Access, true); Access->setNewAccessRelation(AccessRelation); return Access; } /// Forward a load by reading from an array element that contains the same /// value. Typically the location it was loaded from. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param Inst The (possibly speculatable) instruction to forward. /// @param UseStmt The statement that uses @p Inst. /// @param UseLoop The loop @p Inst is used in. /// @param DefStmt The statement @p Inst is defined in. /// @param DefLoop The loop which contains @p Inst. /// @param DoIt If false, only determine whether an operand tree can be /// forwarded. If true, carry out the forwarding. Do not /// use DoIt==true if an operand tree is not known to be /// forwardable. /// /// @return FD_NotApplicable if @p Inst cannot be forwarded by creating a new /// load. /// FD_CannotForward if the pointer operand cannot be forwarded. /// FD_CanForwardProfitably if @p Inst is forwardable. /// FD_DidForwardTree if @p DoIt was true. ForwardingDecision forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, ScopStmt *UseStmt, Loop *UseLoop, ScopStmt *DefStmt, Loop *DefLoop, bool DoIt) { // Cannot do anything without successful known analysis. if (Known.is_null() || Translator.is_null() || MaxOpGuard.hasQuotaExceeded()) return FD_NotApplicable; LoadInst *LI = dyn_cast(Inst); if (!LI) return FD_NotApplicable; // If the load is already in the statement, no forwarding is necessary. // However, it might happen that the LoadInst is already present in the // statement's instruction list. In that case we do as follows: // - For the evaluation (DoIt==false), we can trivially forward it as it is // benefit of forwarding an already present instruction. // - For the execution (DoIt==true), prepend the instruction (to make it // available to all instructions following in the instruction list), but // do not add another MemoryAccess. MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); if (Access && !DoIt) return FD_CanForwardProfitably; ForwardingDecision OpDecision = forwardTree( TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop, DoIt); switch (OpDecision) { case FD_CannotForward: assert(!DoIt); return OpDecision; case FD_CanForwardLeaf: case FD_CanForwardProfitably: assert(!DoIt); break; case FD_DidForwardLeaf: case FD_DidForwardTree: assert(DoIt); break; default: llvm_unreachable("Shouldn't return this"); } IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); // { DomainDef[] -> ValInst[] } isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); assert(!isNormalized(ExpectedVal).is_false() && "LoadInsts are always normalized"); // { DomainUse[] -> DomainTarget[] } isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); // { DomainTarget[] -> ValInst[] } isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); isl::union_map TranslatedExpectedVal = isl::union_map(TargetExpectedVal).apply_range(Translator); // { DomainTarget[] -> Element[] } isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); if (!SameVal) return FD_NotApplicable; if (DoIt) TargetStmt->prependInstruction(LI); if (!DoIt) return FD_CanForwardProfitably; if (Access) { LLVM_DEBUG( dbgs() << " forwarded known load with preexisting MemoryAccess" << Access << "\n"); } else { Access = makeReadArrayAccess(TargetStmt, LI, SameVal); LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access << "\n"); // { ValInst[] } isl::space ValInstSpace = ExpectedVal.get_space().range(); // After adding a new load to the SCoP, also update the Known content // about it. The new load will have a known ValInst of // { [DomainTarget[] -> Value[]] } // but which -- because it is a copy of it -- has same value as the // { [DomainDef[] -> Value[]] } // that it replicates. Instead of cloning the known content of // [DomainDef[] -> Value[]] // for DomainTarget[], we add a 'translator' that maps // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] // before comparing to the known content. // TODO: 'Translator' could also be used to map PHINodes to their incoming // ValInsts. if (ValInstSpace.is_wrapping()) { // { DefDomain[] -> Value[] } isl::map ValInsts = ExpectedVal.range().unwrap(); // { DefDomain[] } isl::set DefDomain = ValInsts.domain(); // { Value[] } isl::space ValSpace = ValInstSpace.unwrap().range(); // { Value[] -> Value[] } isl::map ValToVal = isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); // { DomainDef[] -> DomainTarget[] } isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } isl::map LocalTranslator = DefToTarget.reverse().product(ValToVal); Translator = Translator.add_map(LocalTranslator); LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator << "\n"); } } LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal << "\n"); LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates << "\n"); assert(Access); NumKnownLoadsForwarded++; TotalKnownLoadsForwarded++; return FD_DidForwardTree; } /// Forward a scalar by redirecting the access to an array element that stores /// the same value. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param Inst The scalar to forward. /// @param UseStmt The statement that uses @p Inst. /// @param UseLoop The loop @p Inst is used in. /// @param DefStmt The statement @p Inst is defined in. /// @param DefLoop The loop which contains @p Inst. /// @param DoIt If false, only determine whether an operand tree can be /// forwarded. If true, carry out the forwarding. Do not /// use DoIt==true if an operand tree is not known to be /// forwardable. /// /// @return FD_NotApplicable if @p Inst cannot be reloaded. /// FD_CanForwardLeaf if @p Inst can be reloaded. /// FD_CanForwardProfitably if @p Inst has been reloaded. /// FD_DidForwardLeaf if @p DoIt was true. ForwardingDecision reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, ScopStmt *UseStmt, Loop *UseLoop, ScopStmt *DefStmt, Loop *DefLoop, bool DoIt) { // Cannot do anything without successful known analysis. if (Known.is_null() || Translator.is_null() || MaxOpGuard.hasQuotaExceeded()) return FD_NotApplicable; MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); if (Access && Access->isLatestArrayKind()) { if (DoIt) return FD_DidForwardLeaf; return FD_CanForwardLeaf; } // Don't spend too much time analyzing whether it can be reloaded. When // carrying-out the forwarding, we cannot bail-out in the middle of the // transformation. It also shouldn't take as long because some results are // cached. IslQuotaScope QuotaScope = MaxOpGuard.enter(!DoIt); // { DomainDef[] -> ValInst[] } isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); // { DomainUse[] -> DomainTarget[] } isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); // { DomainTarget[] -> ValInst[] } isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); isl::union_map TranslatedExpectedVal = TargetExpectedVal.apply_range(Translator); // { DomainTarget[] -> Element[] } isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); if (!SameVal) return FD_NotApplicable; if (!DoIt) return FD_CanForwardProfitably; if (!Access) Access = TargetStmt->ensureValueRead(Inst); simplify(SameVal); Access->setNewAccessRelation(SameVal); TotalReloads++; NumReloads++; return FD_DidForwardLeaf; } /// Forwards a speculatively executable instruction. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param UseInst The (possibly speculatable) instruction to forward. /// @param DefStmt The statement @p UseInst is defined in. /// @param DefLoop The loop which contains @p UseInst. /// @param DoIt If false, only determine whether an operand tree can be /// forwarded. If true, carry out the forwarding. Do not /// use DoIt==true if an operand tree is not known to be /// forwardable. /// /// @return FD_NotApplicable if @p UseInst is not speculatable. /// FD_CannotForward if one of @p UseInst's operands is not /// forwardable. /// FD_CanForwardTree if @p UseInst is forwardable. /// FD_DidForward if @p DoIt was true. ForwardingDecision forwardSpeculatable(ScopStmt *TargetStmt, Instruction *UseInst, ScopStmt *DefStmt, Loop *DefLoop, bool DoIt) { // PHIs, unless synthesizable, are not yet supported. if (isa(UseInst)) return FD_NotApplicable; // Compatible instructions must satisfy the following conditions: // 1. Idempotent (instruction will be copied, not moved; although its // original instance might be removed by simplification) // 2. Not access memory (There might be memory writes between) // 3. Not cause undefined behaviour (we might copy to a location when the // original instruction was no executed; this is currently not possible // because we do not forward PHINodes) // 4. Not leak memory if executed multiple times (i.e. malloc) // // Instruction::mayHaveSideEffects is not sufficient because it considers // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is // not sufficient because it allows memory accesses. if (mayBeMemoryDependent(*UseInst)) return FD_NotApplicable; if (DoIt) { // To ensure the right order, prepend this instruction before its // operands. This ensures that its operands are inserted before the // instruction using them. // TODO: The operand tree is not really a tree, but a DAG. We should be // able to handle DAGs without duplication. TargetStmt->prependInstruction(UseInst); NumInstructionsCopied++; TotalInstructionsCopied++; } for (Value *OpVal : UseInst->operand_values()) { ForwardingDecision OpDecision = forwardTree(TargetStmt, OpVal, DefStmt, DefLoop, DoIt); switch (OpDecision) { case FD_CannotForward: assert(!DoIt); return FD_CannotForward; case FD_CanForwardLeaf: case FD_CanForwardProfitably: assert(!DoIt); break; case FD_DidForwardLeaf: case FD_DidForwardTree: assert(DoIt); break; case FD_NotApplicable: llvm_unreachable("forwardTree should never return FD_NotApplicable"); } } if (DoIt) return FD_DidForwardTree; return FD_CanForwardProfitably; } /// Determines whether an operand tree can be forwarded or carries out a /// forwarding, depending on the @p DoIt flag. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param UseVal The value (usually an instruction) which is root of an /// operand tree. /// @param UseStmt The statement that uses @p UseVal. /// @param UseLoop The loop @p UseVal is used in. /// @param DoIt If false, only determine whether an operand tree can be /// forwarded. If true, carry out the forwarding. Do not /// use DoIt==true if an operand tree is not known to be /// forwardable. /// /// @return If DoIt==false, return whether the operand tree can be forwarded. /// If DoIt==true, return FD_DidForward. ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, ScopStmt *UseStmt, Loop *UseLoop, bool DoIt) { ScopStmt *DefStmt = nullptr; Loop *DefLoop = nullptr; // { DefDomain[] -> TargetDomain[] } isl::map DefToTarget; VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); switch (VUse.getKind()) { case VirtualUse::Constant: case VirtualUse::Block: case VirtualUse::Hoisted: // These can be used anywhere without special considerations. if (DoIt) return FD_DidForwardTree; return FD_CanForwardLeaf; case VirtualUse::Synthesizable: { // ScopExpander will take care for of generating the code at the new // location. if (DoIt) return FD_DidForwardTree; // Check if the value is synthesizable at the new location as well. This // might be possible when leaving a loop for which ScalarEvolution is // unable to derive the exit value for. // TODO: If there is a LCSSA PHI at the loop exit, use that one. // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we // do not forward past its loop header. This would require us to use a // previous loop induction variable instead the current one. We currently // do not allow forwarding PHI nodes, thus this should never occur (the // only exception where no phi is necessary being an unreachable loop // without edge from the outside). VirtualUse TargetUse = VirtualUse::create( S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); if (TargetUse.getKind() == VirtualUse::Synthesizable) return FD_CanForwardLeaf; LLVM_DEBUG( dbgs() << " Synthesizable would not be synthesizable anymore: " << *UseVal << "\n"); return FD_CannotForward; } case VirtualUse::ReadOnly: // Note that we cannot return FD_CanForwardTree here. With a operand tree // depth of 0, UseVal is the use in TargetStmt that we try to replace. // With -polly-analyze-read-only-scalars=true we would ensure the // existence of a MemoryAccess (which already exists for a leaf) and be // removed again by tryForwardTree because it's goal is to remove this // scalar MemoryAccess. It interprets FD_CanForwardTree as the permission // to do so. if (!DoIt) return FD_CanForwardLeaf; // If we model read-only scalars, we need to create a MemoryAccess for it. if (ModelReadOnlyScalars) TargetStmt->ensureValueRead(UseVal); NumReadOnlyCopied++; TotalReadOnlyCopied++; return FD_DidForwardLeaf; case VirtualUse::Intra: // Knowing that UseStmt and DefStmt are the same statement instance, just // reuse the information about UseStmt for DefStmt DefStmt = UseStmt; LLVM_FALLTHROUGH; case VirtualUse::Inter: Instruction *Inst = cast(UseVal); if (!DefStmt) { DefStmt = S->getStmtFor(Inst); if (!DefStmt) return FD_CannotForward; } DefLoop = LI->getLoopFor(Inst->getParent()); ForwardingDecision SpeculativeResult = forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop, DoIt); if (SpeculativeResult != FD_NotApplicable) return SpeculativeResult; ForwardingDecision KnownResult = forwardKnownLoad( TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); if (KnownResult != FD_NotApplicable) return KnownResult; ForwardingDecision ReloadResult = reloadKnownContent( TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop, DoIt); if (ReloadResult != FD_NotApplicable) return ReloadResult; // When no method is found to forward the operand tree, we effectively // cannot handle it. LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); return FD_CannotForward; } llvm_unreachable("Case unhandled"); } /// Try to forward an operand tree rooted in @p RA. bool tryForwardTree(MemoryAccess *RA) { assert(RA->isLatestScalarKind()); LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); ScopStmt *Stmt = RA->getStatement(); Loop *InLoop = Stmt->getSurroundingLoop(); isl::map TargetToUse; if (!Known.is_null()) { isl::space DomSpace = Stmt->getDomainSpace(); TargetToUse = isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); } ForwardingDecision Assessment = forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, false); assert(Assessment != FD_DidForwardTree && Assessment != FD_DidForwardLeaf); if (Assessment != FD_CanForwardProfitably) return false; ForwardingDecision Execution = forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop, true); assert(((Execution == FD_DidForwardTree) || (Execution == FD_DidForwardLeaf)) && "A previous positive assessment must also be executable"); if (Execution == FD_DidForwardTree) Stmt->removeSingleMemoryAccess(RA); return true; } /// Return which SCoP this instance is processing. Scop *getScop() const { return S; } /// Run the algorithm: Use value read accesses as operand tree roots and try /// to forward them into the statement. bool forwardOperandTrees() { for (ScopStmt &Stmt : *S) { bool StmtModified = false; // Because we are modifying the MemoryAccess list, collect them first to // avoid iterator invalidation. SmallVector Accs; for (MemoryAccess *RA : Stmt) { if (!RA->isRead()) continue; if (!RA->isLatestScalarKind()) continue; Accs.push_back(RA); } for (MemoryAccess *RA : Accs) { if (tryForwardTree(RA)) { Modified = true; StmtModified = true; NumForwardedTrees++; TotalForwardedTrees++; } } if (StmtModified) { NumModifiedStmts++; TotalModifiedStmts++; } } if (Modified) ScopsModified++; return Modified; } /// Print the pass result, performed transformations and the SCoP after the /// transformation. void print(raw_ostream &OS, int Indent = 0) { printStatistics(OS, Indent); if (!Modified) { // This line can easily be checked in regression tests. OS << "ForwardOpTree executed, but did not modify anything\n"; return; } printStatements(OS, Indent); } }; /// Pass that redirects scalar reads to array elements that are known to contain /// the same value. /// /// This reduces the number of scalar accesses and therefore potentially /// increases the freedom of the scheduler. In the ideal case, all reads of a /// scalar definition are redirected (We currently do not care about removing /// the write in this case). This is also useful for the main DeLICM pass as /// there are less scalars to be mapped. class ForwardOpTree : public ScopPass { private: /// The pass implementation, also holding per-scop data. std::unique_ptr Impl; public: static char ID; explicit ForwardOpTree() : ScopPass(ID) {} ForwardOpTree(const ForwardOpTree &) = delete; ForwardOpTree &operator=(const ForwardOpTree &) = delete; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive(); AU.addRequired(); AU.setPreservesAll(); } bool runOnScop(Scop &S) override { // Free resources for previous SCoP's computation, if not yet done. releaseMemory(); LoopInfo &LI = getAnalysis().getLoopInfo(); { IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); Impl = llvm::make_unique(&S, &LI, MaxOpGuard); if (AnalyzeKnown) { LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); Impl->computeKnownValues(); } LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); Impl->forwardOperandTrees(); if (MaxOpGuard.hasQuotaExceeded()) { LLVM_DEBUG(dbgs() << "Not all operations completed because of " "max_operations exceeded\n"); KnownOutOfQuota++; } } LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); LLVM_DEBUG(dbgs() << S); // Update statistics auto ScopStats = S.getStatistics(); NumValueWrites += ScopStats.NumValueWrites; NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; NumPHIWrites += ScopStats.NumPHIWrites; NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; NumSingletonWrites += ScopStats.NumSingletonWrites; NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; return false; } void printScop(raw_ostream &OS, Scop &S) const override { if (!Impl) return; assert(Impl->getScop() == &S); Impl->print(OS); } void releaseMemory() override { Impl.reset(); } }; // class ForwardOpTree char ForwardOpTree::ID; } // namespace ScopPass *polly::createForwardOpTreePass() { return new ForwardOpTree(); } INITIALIZE_PASS_BEGIN(ForwardOpTree, "polly-optree", "Polly - Forward operand tree", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(ForwardOpTree, "polly-optree", "Polly - Forward operand tree", false, false)