diff options
author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-09-29 23:47:21 +0000 |
---|---|---|
committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2015-09-29 23:47:21 +0000 |
commit | c1db67e218ae22ab626ba75a5f6329786ca84c63 (patch) | |
tree | ded107c00f99e63eb1775434621c98be669a8fd4 | |
parent | f6343d74eff61e909e05b3d0fc4ec7483e922e0c (diff) | |
download | bcm5719-llvm-c1db67e218ae22ab626ba75a5f6329786ca84c63.tar.gz bcm5719-llvm-c1db67e218ae22ab626ba75a5f6329786ca84c63.zip |
Identify and hoist definitively invariant loads
As a first step in the direction of assumed invariant loads (loads
that are not written in some context) we now detect and hoist
definitively invariant loads. These invariant loads will be preloaded
in the code generation and used in the optimized version of the SCoP.
If the load is only conditionally executed the preloaded version will
also only be executed under the same condition, hence we will never
access memory that wouldn't have been accessed otherwise. This is also
the most distinguishing feature to licm.
As hoisting can make statements empty we will simplify the SCoP and
remove empty statements that would otherwise cause artifacts in the
code generation.
Differential Revision: http://reviews.llvm.org/D13194
llvm-svn: 248861
23 files changed, 534 insertions, 155 deletions
diff --git a/polly/include/polly/CodeGen/IslNodeBuilder.h b/polly/include/polly/CodeGen/IslNodeBuilder.h index 616d5c3fc62..2b65c3e3ec2 100644 --- a/polly/include/polly/CodeGen/IslNodeBuilder.h +++ b/polly/include/polly/CodeGen/IslNodeBuilder.h @@ -42,6 +42,9 @@ public: void addParameters(__isl_take isl_set *Context); void create(__isl_take isl_ast_node *Node); + /// @brief Preload all memory loads that are invariant. + void preloadInvariantLoads(); + /// @brief Finalize code generation for the SCoP @p S. /// /// @see BlockGenerator::finalizeSCoP(Scop &S) @@ -190,6 +193,21 @@ protected: /// @param Mark The node we generate code for. virtual void createMark(__isl_take isl_ast_node *Marker); virtual void createFor(__isl_take isl_ast_node *For); + + /// @brief Preload the memory load access @p MA. + /// + /// If @p MA is not always executed it will be conditionally loaded and + /// merged with undef from the same type. Hence, if @p MA is executed only + /// under condition C then the preload code will look like this: + /// + /// MA_preload = undef; + /// if (C) + /// MA_preload = load MA; + /// use MA_preload + Value *preloadInvariantLoad(const MemoryAccess &MA, + __isl_take isl_set *Domain, + __isl_keep isl_ast_build *Build); + void createForVector(__isl_take isl_ast_node *For, int VectorWidth); void createForSequential(__isl_take isl_ast_node *For); diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index aa86d81e1fc..24bb9c5b1f1 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -127,6 +127,9 @@ public: /// @brief Destructor to free the isl id of the base pointer. ~ScopArrayInfo(); + /// @brief Set the base pointer to @p BP. + void setBasePtr(Value *BP) { BasePtr = BP; } + /// @brief Return the base pointer. Value *getBasePtr() const { return BasePtr; } @@ -690,6 +693,15 @@ public: llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, MemoryAccess::ReductionType RT); +/// @brief Ordered list type to hold accesses. +using MemoryAccessList = std::forward_list<MemoryAccess *>; + +/// @brief Type for invariant memory accesses and their domain context. +using InvariantAccessTy = std::pair<MemoryAccess *, isl_set *>; + +/// @brief Type for multiple invariant memory accesses and their domain context. +using InvariantAccessesTy = SmallVector<InvariantAccessTy, 8>; + ///===----------------------------------------------------------------------===// /// @brief Statement of the Scop /// @@ -700,9 +712,6 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, /// At the moment every statement represents a single basic block of LLVM-IR. class ScopStmt { public: - /// @brief List to hold all (scalar) memory accesses mapped to an instruction. - using MemoryAccessList = std::forward_list<MemoryAccess *>; - ScopStmt(const ScopStmt &) = delete; const ScopStmt &operator=(const ScopStmt &) = delete; @@ -880,6 +889,9 @@ public: /// @brief Return true if this statement represents a whole region. bool isRegionStmt() const { return R != nullptr; } + /// @brief Return true if this statement does not contain any accesses. + bool isEmpty() const { return MemAccs.empty(); } + /// @brief Return the (scalar) memory accesses for @p Inst. const MemoryAccessList &getAccessesFor(const Instruction *Inst) const { MemoryAccessList *MAL = lookupAccessesFor(Inst); @@ -913,6 +925,13 @@ public: BB = Block; } + /// @brief Move the memory access in @p InvMAs to @p TargetList. + /// + /// Note that scalar accesses that are caused by any access in @p InvMAs will + /// be eliminated too. + void hoistMemoryAccesses(MemoryAccessList &InvMAs, + InvariantAccessesTy &TargetList); + typedef MemoryAccessVec::iterator iterator; typedef MemoryAccessVec::const_iterator const_iterator; @@ -1023,7 +1042,7 @@ private: /// Max loop depth. unsigned MaxLoopDepth; - typedef std::deque<ScopStmt> StmtSet; + typedef std::list<ScopStmt> StmtSet; /// The statements in this Scop. StmtSet Stmts; @@ -1130,6 +1149,9 @@ private: /// group to ensure the SCoP is executed in an alias free environment. MinMaxVectorPairVectorTy MinMaxAliasGroups; + /// @brief List of invariant accesses. + InvariantAccessesTy InvariantAccesses; + /// @brief Scop constructor; invoked from ScopInfo::buildScop. Scop(Region &R, AccFuncMapType &AccFuncMap, ScalarEvolution &SE, DominatorTree &DT, isl_ctx *ctx, unsigned MaxLoopDepth); @@ -1183,6 +1205,15 @@ private: /// @brief Add parameter constraints to @p C that imply a non-empty domain. __isl_give isl_set *addNonEmptyDomainConstraints(__isl_take isl_set *C) const; + /// @brief Simplify the SCoP representation + /// + /// At the moment we perform the following simplifications: + /// - removal of empty statements (due to invariant load hoisting) + void simplifySCoP(); + + /// @brief Hoist all invariant memory loads. + void hoistInvariantLoads(); + /// @brief Build the Context of the Scop. void buildContext(); @@ -1313,6 +1344,11 @@ public: /// @return The maximum depth of the loop. inline unsigned getMaxLoopDepth() const { return MaxLoopDepth; } + /// @brief Return the set of invariant accesses. + const InvariantAccessesTy &getInvariantAccesses() const { + return InvariantAccesses; + } + /// @brief Mark the SCoP as optimized by the scheduler. void markAsOptimized() { IsOptimized = true; } diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 232c03b077c..6f3b8520bf9 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -1350,6 +1350,46 @@ void ScopStmt::print(raw_ostream &OS) const { void ScopStmt::dump() const { print(dbgs()); } +void ScopStmt::hoistMemoryAccesses(MemoryAccessList &InvMAs, + InvariantAccessesTy &TargetList) { + + // Remove all memory accesses in @p InvMAs from this statement together + // with all scalar accesses that were caused by them. The tricky iteration + // order uses is needed because the MemAccs is a vector and the order in + // which the accesses of each memory access list (MAL) are stored in this + // vector is reversed. + for (MemoryAccess *MA : InvMAs) { + auto &MAL = *lookupAccessesFor(MA->getAccessInstruction()); + MAL.reverse(); + + auto MALIt = MAL.begin(); + auto MALEnd = MAL.end(); + auto MemAccsIt = MemAccs.begin(); + while (MALIt != MALEnd) { + while (*MemAccsIt != *MALIt) + MemAccsIt++; + + MALIt++; + MemAccs.erase(MemAccsIt); + } + + InstructionToAccess.erase(MA->getAccessInstruction()); + delete &MAL; + } + + // Get the context under which this statement, hence the memory accesses, are + // executed. + isl_set *DomainCtx = isl_set_params(getDomain()); + DomainCtx = isl_set_remove_redundancies(DomainCtx); + DomainCtx = isl_set_detect_equalities(DomainCtx); + DomainCtx = isl_set_coalesce(DomainCtx); + + for (MemoryAccess *MA : InvMAs) + TargetList.push_back(std::make_pair(MA, isl_set_copy(DomainCtx))); + + isl_set_free(DomainCtx); +} + //===----------------------------------------------------------------------===// /// Scop class implement @@ -2268,6 +2308,9 @@ void Scop::init(LoopInfo &LI, ScopDetection &SD, AliasAnalysis &AA) { buildBoundaryContext(); simplifyContexts(); buildAliasChecks(AA); + + hoistInvariantLoads(); + simplifySCoP(); } Scop::~Scop() { @@ -2290,6 +2333,9 @@ Scop::~Scop() { isl_pw_multi_aff_free(MMA.second); } } + + for (const auto &IA : InvariantAccesses) + isl_set_free(IA.second); } void Scop::updateAccessDimensionality() { @@ -2298,6 +2344,81 @@ void Scop::updateAccessDimensionality() { Access->updateDimensionality(); } +void Scop::simplifySCoP() { + + for (auto StmtIt = Stmts.begin(), StmtEnd = Stmts.end(); StmtIt != StmtEnd;) { + ScopStmt &Stmt = *StmtIt; + + if (!StmtIt->isEmpty()) { + StmtIt++; + continue; + } + + if (Stmt.isRegionStmt()) + for (BasicBlock *BB : Stmt.getRegion()->blocks()) + StmtMap.erase(BB); + else + StmtMap.erase(Stmt.getBasicBlock()); + + StmtIt = Stmts.erase(StmtIt); + } +} + +void Scop::hoistInvariantLoads() { + isl_union_map *Writes = getWrites(); + for (ScopStmt &Stmt : *this) { + + // TODO: Loads that are not loop carried, hence are in a statement with + // zero iterators, are by construction invariant, though we + // currently "hoist" them anyway. + + isl_set *Domain = Stmt.getDomain(); + MemoryAccessList InvMAs; + + for (MemoryAccess *MA : Stmt) { + if (MA->isImplicit() || MA->isWrite() || !MA->isAffine()) + continue; + + isl_map *AccessRelation = MA->getAccessRelation(); + if (isl_map_involves_dims(AccessRelation, isl_dim_in, 0, + Stmt.getNumIterators())) { + isl_map_free(AccessRelation); + continue; + } + + AccessRelation = + isl_map_intersect_domain(AccessRelation, isl_set_copy(Domain)); + isl_set *AccessRange = isl_map_range(AccessRelation); + + isl_union_map *Written = isl_union_map_intersect_range( + isl_union_map_copy(Writes), isl_union_set_from_set(AccessRange)); + bool IsWritten = !isl_union_map_is_empty(Written); + isl_union_map_free(Written); + + if (IsWritten) + continue; + + InvMAs.push_front(MA); + } + + // We inserted invariant accesses always in the front but need them to be + // sorted in a "natural order". The statements are already sorted in reverse + // post order and that suffices for the accesses too. The reason we require + // an order in the first place is the dependences between invariant loads + // that can be caused by indirect loads. + InvMAs.reverse(); + + // Transfer the memory access from the statement to the SCoP. + Stmt.hoistMemoryAccesses(InvMAs, InvariantAccesses); + + isl_set_free(Domain); + } + isl_union_map_free(Writes); + + if (!InvariantAccesses.empty()) + IsOptimized = true; +} + const ScopArrayInfo * Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *AccessType, ArrayRef<const SCEV *> Sizes, bool IsPHI) { @@ -2478,6 +2599,12 @@ void Scop::print(raw_ostream &OS) const { << "\n"; OS.indent(4) << "Region: " << getNameStr() << "\n"; OS.indent(4) << "Max Loop Depth: " << getMaxLoopDepth() << "\n"; + OS.indent(4) << "Invariant Accesses: {\n"; + for (const auto &IA : InvariantAccesses) { + IA.first->print(OS); + OS.indent(12) << "Execution Context: " << IA.second << "\n"; + } + OS.indent(4) << "}\n"; printContext(OS.indent(4)); printArrayInfo(OS.indent(4)); printAliasAssumptions(OS); diff --git a/polly/lib/CodeGen/BlockGenerators.cpp b/polly/lib/CodeGen/BlockGenerators.cpp index bae65907a41..dc6c519ee18 100644 --- a/polly/lib/CodeGen/BlockGenerators.cpp +++ b/polly/lib/CodeGen/BlockGenerators.cpp @@ -108,6 +108,8 @@ Value *BlockGenerator::getNewValue(ScopStmt &Stmt, const Value *Old, return const_cast<Value *>(Old); if (Value *New = GlobalMap.lookup(Old)) { + if (Value *NewRemapped = GlobalMap.lookup(New)) + New = NewRemapped; if (Old->getType()->getScalarSizeInBits() < New->getType()->getScalarSizeInBits()) New = Builder.CreateTruncOrBitCast(New, Old->getType()); @@ -226,6 +228,9 @@ Loop *BlockGenerator::getLoopForInst(const llvm::Instruction *Inst) { Value *BlockGenerator::generateScalarLoad(ScopStmt &Stmt, const LoadInst *Load, ValueMapT &BBMap, LoopToScevMapT <S, isl_id_to_ast_expr *NewAccesses) { + if (Value *PreloadLoad = GlobalMap.lookup(Load)) + return PreloadLoad; + const Value *Pointer = Load->getPointerOperand(); Value *NewPointer = generateLocationAccessed(Stmt, Load, Pointer, BBMap, LTS, NewAccesses); @@ -762,6 +767,12 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad( void VectorBlockGenerator::generateLoad( ScopStmt &Stmt, const LoadInst *Load, ValueMapT &VectorMap, VectorValueMapT &ScalarMaps, __isl_keep isl_id_to_ast_expr *NewAccesses) { + if (Value *PreloadLoad = GlobalMap.lookup(Load)) { + VectorMap[Load] = Builder.CreateVectorSplat(getVectorWidth(), PreloadLoad, + Load->getName() + "_p"); + return; + } + if (!VectorType::isValidElementType(Load->getType())) { for (int i = 0; i < getVectorWidth(); i++) ScalarMaps[i][Load] = diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 96d09387565..53aac25a3db 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -146,8 +146,9 @@ public: auto SplitBlock = StartBlock->getSinglePredecessor(); Builder.SetInsertPoint(SplitBlock->getTerminator()); NodeBuilder.addParameters(S.getContext()); + NodeBuilder.preloadInvariantLoads(); Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); - SplitBlock->getTerminator()->setOperand(0, RTC); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); Builder.SetInsertPoint(StartBlock->begin()); NodeBuilder.create(AstRoot); diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 1574e6167f0..83121eed0a0 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -814,6 +814,123 @@ void IslNodeBuilder::create(__isl_take isl_ast_node *Node) { llvm_unreachable("Unknown isl_ast_node type"); } +/// @brief Create the actual preload memory access for @p MA. +static inline Value *createPreloadLoad(Scop &S, const MemoryAccess &MA, + isl_ast_build *Build, + IslExprBuilder &ExprBuilder) { + isl_set *AccessRange = isl_map_range(MA.getAccessRelation()); + isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange); + PWAccRel = isl_pw_multi_aff_gist_params(PWAccRel, S.getContext()); + isl_ast_expr *Access = + isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel); + return ExprBuilder.create(Access); +} + +Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, + isl_set *Domain, + isl_ast_build *Build) { + + isl_set *Universe = isl_set_universe(isl_set_get_space(Domain)); + bool AlwaysExecuted = isl_set_is_equal(Domain, Universe); + isl_set_free(Universe); + + if (AlwaysExecuted) { + isl_set_free(Domain); + return createPreloadLoad(S, MA, Build, ExprBuilder); + } else { + + isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); + + Value *Cond = ExprBuilder.create(DomainCond); + if (!Cond->getType()->isIntegerTy(1)) + Cond = Builder.CreateIsNotNull(Cond); + + BasicBlock *CondBB = SplitBlock(Builder.GetInsertBlock(), + Builder.GetInsertPoint(), &DT, &LI); + CondBB->setName("polly.preload.cond"); + + BasicBlock *MergeBB = SplitBlock(CondBB, CondBB->begin(), &DT, &LI); + MergeBB->setName("polly.preload.merge"); + + Function *F = Builder.GetInsertBlock()->getParent(); + LLVMContext &Context = F->getContext(); + BasicBlock *ExecBB = BasicBlock::Create(Context, "polly.preload.exec", F); + + DT.addNewBlock(ExecBB, CondBB); + if (Loop *L = LI.getLoopFor(CondBB)) + L->addBasicBlockToLoop(ExecBB, LI); + + auto *CondBBTerminator = CondBB->getTerminator(); + Builder.SetInsertPoint(CondBBTerminator); + Builder.CreateCondBr(Cond, ExecBB, MergeBB); + CondBBTerminator->eraseFromParent(); + + Builder.SetInsertPoint(ExecBB); + Builder.CreateBr(MergeBB); + + Builder.SetInsertPoint(ExecBB->getTerminator()); + Instruction *AccInst = MA.getAccessInstruction(); + Type *AccInstTy = AccInst->getType(); + Value *PreAccInst = createPreloadLoad(S, MA, Build, ExprBuilder); + + Builder.SetInsertPoint(MergeBB->getTerminator()); + auto *MergePHI = Builder.CreatePHI( + AccInstTy, 2, "polly.preload." + AccInst->getName() + ".merge"); + MergePHI->addIncoming(PreAccInst, ExecBB); + MergePHI->addIncoming(Constant::getNullValue(AccInstTy), CondBB); + + return MergePHI; + } +} + +void IslNodeBuilder::preloadInvariantLoads() { + + const auto &InvAccList = S.getInvariantAccesses(); + if (InvAccList.empty()) + return; + + const Region &R = S.getRegion(); + + BasicBlock *PreLoadBB = + SplitBlock(Builder.GetInsertBlock(), Builder.GetInsertPoint(), &DT, &LI); + PreLoadBB->setName("polly.preload.begin"); + Builder.SetInsertPoint(PreLoadBB->begin()); + + isl_ast_build *Build = + isl_ast_build_from_context(isl_set_universe(S.getParamSpace())); + + for (const auto &IA : InvAccList) { + MemoryAccess *MA = IA.first; + assert(!MA->isImplicit()); + + isl_set *Domain = isl_set_copy(IA.second); + Instruction *AccInst = MA->getAccessInstruction(); + Value *PreloadVal = preloadInvariantLoad(*MA, Domain, Build); + ValueMap[AccInst] = PreloadVal; + + if (SE.isSCEVable(AccInst->getType())) { + isl_id *ParamId = S.getIdForParam(SE.getSCEV(AccInst)); + if (ParamId) + IDToValue[ParamId] = PreloadVal; + isl_id_free(ParamId); + } + + SmallVector<Instruction *, 4> Users; + for (auto *U : AccInst->users()) + if (Instruction *UI = dyn_cast<Instruction>(U)) + if (!R.contains(UI)) + Users.push_back(UI); + for (auto *U : Users) + U->replaceUsesOfWith(AccInst, PreloadVal); + + auto *SAI = S.getScopArrayInfo(MA->getBaseAddr()); + for (auto *DerivedSAI : SAI->getDerivedSAIs()) + DerivedSAI->setBasePtr(PreloadVal); + } + + isl_ast_build_free(Build); +} + void IslNodeBuilder::addParameters(__isl_take isl_set *Context) { for (unsigned i = 0; i < isl_set_dim(Context, isl_dim_param); ++i) { diff --git a/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll b/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll index ad956e6df43..69614599843 100644 --- a/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ b/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -6,6 +6,7 @@ ; } ; ; CHECK: sext i32 %c to i64 +; CHECK: sext i32 %c to i64 ; CHECK: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 ; CHECK: %[[M1:[._a-zA-Z0-9]*]] = icmp sle i64 %[[M0]], 15 ; CHECK: %[[M2:[._a-zA-Z0-9]*]] = sext i32 %c to i64 @@ -23,7 +24,7 @@ ; CHECK: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i32* %[[AMax]], %[[BMin]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] -; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %1, %[[NoAlias]] +; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %3, %[[NoAlias]] ; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/exprModDiv.ll b/polly/test/Isl/CodeGen/exprModDiv.ll index 42d555db2e3..ce59ad8fdf5 100644 --- a/polly/test/Isl/CodeGen/exprModDiv.ll +++ b/polly/test/Isl/CodeGen/exprModDiv.ll @@ -6,7 +6,7 @@ ; ; void exprModDiv(float *A, float *B, float *C, long N, long p) { ; for (long i = 0; i < N; i++) -; C[i] += A[i] + B[i] + A[p] + B[p]; +; C[i] += A[i] + B[i] + A[i] + B[i + p]; ; } ; ; @@ -32,21 +32,21 @@ ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 127 * floord(-p - 1, 127) + 127] -; CHECK: %20 = sub nsw i64 0, %p -; CHECK: %21 = sub nsw i64 %20, 1 -; CHECK: %pexp.fdiv_q.0 = sub i64 %21, 127 +; CHECK: %17 = sub nsw i64 0, %p +; CHECK: %18 = sub nsw i64 %17, 1 +; CHECK: %pexp.fdiv_q.0 = sub i64 %18, 127 ; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 -; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %21, 0 -; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %21 +; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %18, 0 +; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %18 ; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 -; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4 -; CHECK: %23 = add nsw i64 %p, %22 -; CHECK: %24 = add nsw i64 %23, 127 -; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24 +; CHECK: %19 = mul nsw i64 127, %pexp.fdiv_q.4 +; CHECK: %20 = add nsw i64 %p, %19 +; CHECK: %21 = add nsw i64 %20, 127 +; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %21 ; A[p / 127] ; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; CHECK: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 @@ -58,17 +58,17 @@ ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 128 * floord(-p - 1, 128) + 128] -; POW2: %20 = sub nsw i64 0, %p -; POW2: %21 = sub nsw i64 %20, 1 -; POW2: %polly.fdiv_q.shr = ashr i64 %21, 7 -; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr -; POW2: %23 = add nsw i64 %p, %22 -; POW2: %24 = add nsw i64 %23, 128 -; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24 +; POW2: %17 = sub nsw i64 0, %p +; POW2: %18 = sub nsw i64 %17, 1 +; POW2: %polly.fdiv_q.shr = ashr i64 %18, 7 +; POW2: %19 = mul nsw i64 128, %polly.fdiv_q.shr +; POW2: %20 = add nsw i64 %p, %19 +; POW2: %21 = add nsw i64 %20, 128 +; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %21 ; A[p / 128] ; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B13 = getelementptr float, float* %B, i64 %pexp.div target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -87,10 +87,11 @@ for.body: ; preds = %for.cond %arrayidx1 = getelementptr inbounds float, float* %B, i64 %i.0 %tmp1 = load float, float* %arrayidx1, align 4 %add = fadd float %tmp, %tmp1 - %arrayidx2 = getelementptr inbounds float, float* %A, i64 %p + %arrayidx2 = getelementptr inbounds float, float* %A, i64 %i.0 %tmp2 = load float, float* %arrayidx2, align 4 %add3 = fadd float %add, %tmp2 - %arrayidx4 = getelementptr inbounds float, float* %B, i64 %p + %padd = add nsw i64 %p, %i.0 + %arrayidx4 = getelementptr inbounds float, float* %B, i64 %padd %tmp3 = load float, float* %arrayidx4, align 4 %add5 = fadd float %add3, %tmp3 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %i.0 diff --git a/polly/test/Isl/CodeGen/invariant_load.ll b/polly/test/Isl/CodeGen/invariant_load.ll new file mode 100644 index 00000000000..0e03614f430 --- /dev/null +++ b/polly/test/Isl/CodeGen/invariant_load.ll @@ -0,0 +1,39 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-no-early-exit -polly-codegen -S < %s | FileCheck %s +; +; CHECK-LABEL: polly.preload.begin: +; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B +; +; CHECK-LABEL: polly.stmt.bb2: +; CHECK-NEXT: %scevgep = getelementptr i32, i32* %A, i64 %polly.indvar +; CHECK-NEXT: store i32 %polly.access.B.load, i32* %scevgep, align 4 +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} diff --git a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll index e880a3ecf98..49d5e82e333 100644 --- a/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll +++ b/polly/test/Isl/CodeGen/non-affine-phi-node-expansion.ll @@ -4,6 +4,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" %struct.wombat = type {[4 x i32]} +; CHECK: polly.preload.begin: +; CHECK-NEXT: %polly.access.B = getelementptr i32, i32* %B, i64 0 +; CHECK-NEXT: %polly.access.B.load = load i32, i32* %polly.access.B +; CHECK-NOT: %polly.access.B.load = load i32, i32* %polly.access.B + ; CHECK: polly.stmt.bb3.entry: ; preds = %polly.start ; CHECK: br label %polly.stmt.bb3 @@ -14,8 +19,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; CHECK: br label %polly.stmt.bb13.exit ; CHECK: polly.stmt.bb5: ; preds = %polly.stmt.bb3 -; CHECK: %tmp7_p_scalar_ = load i32, i32* %B, !alias.scope !0, !noalias !2 -; CHECK: store i32 %tmp7_p_scalar_, i32* %polly.access.cast.arg1, !alias.scope !3, !noalias !4 +; CHECK: store i32 %polly.access.B.load, i32* %polly.access.cast.arg2 ; CHECK: br label %polly.stmt.bb13.exit ; Function Attrs: nounwind uwtable diff --git a/polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll b/polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll deleted file mode 100644 index 0e9d2a70af7..00000000000 --- a/polly/test/Isl/CodeGen/phi_in_exit_early_lnt_failure_4.ll +++ /dev/null @@ -1,62 +0,0 @@ -; RUN: opt %loadPolly -disable-basicaa -polly-detect-unprofitable -polly-codegen -polly-no-early-exit -S < %s | FileCheck %s -; -; This caused an lnt crash at some point, just verify it will run through and -; produce the PHI node in the exit we are looking for. -; -; CHECK-LABEL: polly.merge_new_and_old: -; CHECK-NEXT: %.merge = phi %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* [ %.final_reload, %polly.stmt.for.end.298 ], [ %13, %for.end.298 ] -; -%struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823*, %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 } -%struct.Picture.8.32.56.80.104.320.536.752.1016.1040.1184.1232.1352.1376.1400.1424.1496.1568.1664.1736.1832.2048.2120.2336.2384.2840.2864.2888.2912.3584.3800.3823 = type { i32, i32, [100 x %struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822*], i32, float, float, float } -%struct.Slice.7.31.55.79.103.319.535.751.1015.1039.1183.1231.1351.1375.1399.1423.1495.1567.1663.1735.1831.2047.2119.2335.2383.2839.2863.2887.2911.3583.3799.3822 = type { i32, i32, i32, i32, i32, i32, %struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818*, %struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820*, %struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] } -%struct.datapartition.3.27.51.75.99.315.531.747.1011.1035.1179.1227.1347.1371.1395.1419.1491.1563.1659.1731.1827.2043.2115.2331.2379.2835.2859.2883.2907.3579.3795.3818 = type { %struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816*, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817, %struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 } -%struct.Bitstream.1.25.49.73.97.313.529.745.1009.1033.1177.1225.1345.1369.1393.1417.1489.1561.1657.1729.1825.2041.2113.2329.2377.2833.2857.2881.2905.3577.3793.3816 = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 } -%struct.EncodingEnvironment.2.26.50.74.98.314.530.746.1010.1034.1178.1226.1346.1370.1394.1418.1490.1562.1658.1730.1826.2042.2114.2330.2378.2834.2858.2882.2906.3578.3794.3817 = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 } -%struct.MotionInfoContexts.5.29.53.77.101.317.533.749.1013.1037.1181.1229.1349.1373.1397.1421.1493.1565.1661.1733.1829.2045.2117.2333.2381.2837.2861.2885.2909.3581.3797.3820 = type { [3 x [11 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [9 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [10 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [2 x [6 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819] } -%struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819 = type { i16, i8, i64 } -%struct.TextureInfoContexts.6.30.54.78.102.318.534.750.1014.1038.1182.1230.1350.1374.1398.1422.1494.1566.1662.1734.1830.2046.2118.2334.2382.2838.2862.2886.2910.3582.3798.3821 = type { [2 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819], [3 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [4 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [5 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]], [10 x [15 x %struct.BiContextType.4.28.52.76.100.316.532.748.1012.1036.1180.1228.1348.1372.1396.1420.1492.1564.1660.1732.1828.2044.2116.2332.2380.2836.2860.2884.2908.3580.3796.3819]] } -%struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824 = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, %struct.macroblock.9.33.57.81.105.321.537.753.1017.1041.1185.1233.1353.1377.1401.1425.1497.1569.1665.1737.1833.2049.2121.2337.2385.2841.2865.2889.2913.3585.3801.3824*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 } -%struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825 = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s.10.34.58.82.106.322.538.754.1018.1042.1186.1234.1354.1378.1402.1426.1498.1570.1666.1738.1834.2050.2122.2338.2386.2842.2866.2890.2914.3586.3802.3825* } - -@img = external global %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, align 8 - -; Function Attrs: nounwind uwtable -define void @intrapred_luma() #0 { -entry: - %PredPel = alloca [13 x i16], align 16 - br label %for.body - -for.body: ; preds = %for.body, %entry - br i1 undef, label %for.body, label %for.body.262 - -for.body.262: ; preds = %for.body - %0 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 - br label %for.body.280 - -for.body.280: ; preds = %for.body.280, %for.body.262 - %indvars.iv66 = phi i64 [ 0, %for.body.262 ], [ %indvars.iv.next67, %for.body.280 ] - %arrayidx282 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 1 - %arrayidx283 = getelementptr inbounds i16, i16* %arrayidx282, i64 %indvars.iv66 - %1 = load i16, i16* %arrayidx283, align 2 - %arrayidx289 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %0, i64 0, i32 47, i64 0, i64 2, i64 %indvars.iv66 - store i16 %1, i16* %arrayidx289, align 2 - %indvars.iv.next67 = add nuw nsw i64 %indvars.iv66, 1 - br i1 false, label %for.body.280, label %for.end.298 - -for.end.298: ; preds = %for.body.280 - %2 = load %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826*, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826** @img, align 8 - br label %for.body.310 - -for.body.310: ; preds = %for.body.310, %for.end.298 - %indvars.iv = phi i64 [ 0, %for.end.298 ], [ %indvars.iv.next, %for.body.310 ] - %arrayidx312 = getelementptr inbounds [13 x i16], [13 x i16]* %PredPel, i64 0, i64 9 - %arrayidx313 = getelementptr inbounds i16, i16* %arrayidx312, i64 %indvars.iv - %3 = load i16, i16* %arrayidx313, align 2 - %arrayidx322 = getelementptr inbounds %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826, %struct.ImageParameters.11.35.59.83.107.323.539.755.1019.1043.1187.1235.1355.1379.1403.1427.1499.1571.1667.1739.1835.2051.2123.2339.2387.2843.2867.2891.2915.3587.3803.3826* %2, i64 0, i32 47, i64 1, i64 %indvars.iv, i64 1 - store i16 %3, i16* %arrayidx322, align 2 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br i1 false, label %for.body.310, label %for.end.328 - -for.end.328: ; preds = %for.body.310 - ret void -} diff --git a/polly/test/Isl/CodeGen/simple_vec_call.ll b/polly/test/Isl/CodeGen/simple_vec_call.ll index a7b5d6bdbcf..721f3c31abe 100644 --- a/polly/test/Isl/CodeGen/simple_vec_call.ll +++ b/polly/test/Isl/CodeGen/simple_vec_call.ll @@ -24,16 +24,10 @@ return: ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %3) [[NUW]] +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float @foo(float %.load) [[NUW]] ; CHECK: %4 = insertelement <4 x float> undef, float [[RES1]], i32 0 ; CHECK: %5 = insertelement <4 x float> %4, float [[RES2]], i32 1 ; CHECK: %6 = insertelement <4 x float> %5, float [[RES3]], i32 2 diff --git a/polly/test/Isl/CodeGen/simple_vec_call_2.ll b/polly/test/Isl/CodeGen/simple_vec_call_2.ll index 0b0a5128df2..d87a96e0996 100644 --- a/polly/test/Isl/CodeGen/simple_vec_call_2.ll +++ b/polly/test/Isl/CodeGen/simple_vec_call_2.ll @@ -24,19 +24,13 @@ return: ret void } -; CHECK: %value_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0 -; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1 -; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2 -; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3 -; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %0) [[NUW:#[0-9]+]] -; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %1) [[NUW]] -; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %2) [[NUW]] -; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %3) [[NUW]] -; CHECK: %4 = insertelement <4 x float**> undef, float** %p_result, i32 0 -; CHECK: %5 = insertelement <4 x float**> %4, float** %p_result1, i32 1 -; CHECK: %6 = insertelement <4 x float**> %5, float** %p_result2, i32 2 -; CHECK: %7 = insertelement <4 x float**> %6, float** %p_result3, i32 3 -; CHECK: store <4 x float**> %7, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align +; CHECK: [[RES1:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW:#[0-9]+]] +; CHECK: [[RES2:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES3:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: [[RES4:%[a-zA-Z0-9_]+]] = tail call float** @foo(float %.load) [[NUW]] +; CHECK: %0 = insertelement <4 x float**> undef, float** %p_result, i32 0 +; CHECK: %1 = insertelement <4 x float**> %0, float** %p_result1, i32 1 +; CHECK: %2 = insertelement <4 x float**> %1, float** %p_result2, i32 2 +; CHECK: %3 = insertelement <4 x float**> %2, float** %p_result3, i32 3 +; CHECK: store <4 x float**> %3, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align ; CHECK: attributes [[NUW]] = { nounwind } diff --git a/polly/test/Isl/CodeGen/simple_vec_cast.ll b/polly/test/Isl/CodeGen/simple_vec_cast.ll index e7501a1b83b..a1f7e32b435 100644 --- a/polly/test/Isl/CodeGen/simple_vec_cast.ll +++ b/polly/test/Isl/CodeGen/simple_vec_cast.ll @@ -28,8 +28,10 @@ bb4: ; preds = %bb1 ret void } -; CHECK: %tmp_p_splat_one = load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8, !alias.scope !0, !noalias !2 -; CHECK: %tmp_p_splat = shufflevector <1 x float> %tmp_p_splat_one, <1 x float> %tmp_p_splat_one, <4 x i32> zeroinitializer -; CHECK: %0 = fpext <4 x float> %tmp_p_splat to <4 x double> -; CHECK: store <4 x double> %0, <4 x double>* bitcast ([1024 x double]* @B to <4 x double>*), align 8, !alias.scope !3, !noalias !4 +; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) +; CHECK: polly.stmt.bb2: ; preds = %polly.start +; CHECK: %tmp_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 +; CHECK: %tmp_p.splat = shufflevector <4 x float> %tmp_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer +; CHECK: %0 = fpext <4 x float> %tmp_p.splat to <4 x double> +; CHECK: store <4 x double> %0, <4 x double>* diff --git a/polly/test/Isl/CodeGen/simple_vec_const.ll b/polly/test/Isl/CodeGen/simple_vec_const.ll index b59b829dc0b..e99303df6ce 100644 --- a/polly/test/Isl/CodeGen/simple_vec_const.ll +++ b/polly/test/Isl/CodeGen/simple_vec_const.ll @@ -52,5 +52,8 @@ define i32 @main() nounwind { } -; CHECK: load <1 x float>, <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*) -; CHECK: shufflevector <1 x float> {{.*}}, <1 x float> {{.*}} <4 x i32> zeroinitializer +; CHECK: %.load = load float, float* getelementptr inbounds ([1024 x float], [1024 x float]* @A, i32 0, i32 0) + +; CHECK: polly.stmt.: ; preds = %polly.start +; CHECK: %_p.splatinsert = insertelement <4 x float> undef, float %.load, i32 0 +; CHECK: %_p.splat = shufflevector <4 x float> %_p.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer diff --git a/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll b/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll index 343c017f40b..fe558527e55 100644 --- a/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll +++ b/polly/test/Isl/CodeGen/simple_vec_ptr_ptr_ty.ll @@ -22,6 +22,9 @@ body: return: ret void } -; CHECK: %value_p_splat_one = load <1 x float**>, <1 x float**>* bitcast ([1024 x float**]* @A to <1 x float**>*), align 8 -; CHECK: %value_p_splat = shufflevector <1 x float**> %value_p_splat_one, <1 x float**> %value_p_splat_one, <4 x i32> zeroinitializer -; CHECK: store <4 x float**> %value_p_splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8 +; CHECK: %.load = load float**, float*** getelementptr inbounds ([1024 x float**], [1024 x float**]* @A, i32 0, i32 0) + +; CHECK-NOT: load <1 x float**> +; CHECK: %value_p.splatinsert = insertelement <4 x float**> undef, float** %.load, i32 0 +; CHECK: %value_p.splat = shufflevector <4 x float**> %value_p.splatinsert, <4 x float**> undef, <4 x i32> zeroinitializer +; CHECK: store <4 x float**> %value_p.splat, <4 x float**>* bitcast ([1024 x float**]* @B to <4 x float**>*), align 8 diff --git a/polly/test/Isl/CodeGen/two-scops-in-row.ll b/polly/test/Isl/CodeGen/two-scops-in-row.ll index 4fda7d40978..c2552bf28a1 100644 --- a/polly/test/Isl/CodeGen/two-scops-in-row.ll +++ b/polly/test/Isl/CodeGen/two-scops-in-row.ll @@ -21,6 +21,7 @@ entry: for.0: %Scalar0.val = load i32, i32* %Scalar0 + store i32 1, i32* %Scalar0 br i1 false, label %for.0, label %for.1.preheader for.1.preheader: diff --git a/polly/test/ScopInfo/inter_bb_scalar_dep.ll b/polly/test/ScopInfo/inter_bb_scalar_dep.ll index f8d91fba5c4..d7a35d598a5 100644 --- a/polly/test/ScopInfo/inter_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/inter_bb_scalar_dep.ll @@ -14,6 +14,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; Function Attrs: nounwind +; CHECK: Invariant +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] + define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -25,11 +29,7 @@ for.i: ; preds = %for.i.end, %entry entry.next: ; preds = %for.i %init = load i64, i64* %init_ptr -; CHECK-LABEL: Stmt_entry_next -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; +; CHECK-NOT: Stmt_entry_next br label %for.j for.j: ; preds = %for.j, %entry.next diff --git a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll index 3766e17d136..f2ac3db8183 100644 --- a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -14,7 +14,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" -; Function Attrs: nounwind +; CHECK: Invariant Accesses: { +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: MemRef_init_ptr[0] +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: MemRef_init_ptr[0] +; CHECK: } define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -26,23 +31,17 @@ for.i: ; preds = %for.i.end, %entry entry.next: ; preds = %for.i %init = load i64, i64* %init_ptr -; CHECK-LABEL: Stmt_entry_next -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; +; CHECK-NOT: Stmt_entry_next br label %for.j for.j: ; preds = %for.j, %entry.next %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] %init_2 = load i64, i64* %init_ptr %init_sum = add i64 %init, %init_2 -; CHECK-LABEL: Stmt_for_j +; CHECK: Stmt_for_j ; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init[] }; -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; %scevgep = getelementptr i64, i64* %A, i64 %indvar.j store i64 %init_sum, i64* %scevgep diff --git a/polly/test/ScopInfo/intra_bb_scalar_dep.ll b/polly/test/ScopInfo/intra_bb_scalar_dep.ll index 5476077511f..446a8a2714d 100644 --- a/polly/test/ScopInfo/intra_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/intra_bb_scalar_dep.ll @@ -14,6 +14,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; Function Attrs: nounwind +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) #0 { entry: br label %for.i @@ -32,11 +35,12 @@ for.j: ; preds = %for.j, %entry.next %init_plus_two = add i64 %init, 2 %scevgep = getelementptr i64, i64* %A, i64 %indvar.j store i64 %init_plus_two, i64* %scevgep -; CHECK-LABEL: Stmt_for_j -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_j +; CHECK-NOT: ReadAccess +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_A[i1] }; +; CHECK-NEXT: } %indvar.j.next = add nsw i64 %indvar.j, 1 %exitcond.j = icmp eq i64 %indvar.j.next, %N br i1 %exitcond.j, label %for.i.end, label %for.j diff --git a/polly/test/ScopInfo/invariant_load.ll b/polly/test/ScopInfo/invariant_load.ll new file mode 100644 index 00000000000..3ad9dd8486f --- /dev/null +++ b/polly/test/ScopInfo/invariant_load.ll @@ -0,0 +1,35 @@ +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_B[0] }; +; +; void f(int *restrict A, int *restrict B) { +; for (int i = 0; i < 1024; i++) +; A[i] = *B; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %A, i32* noalias %B) { +bb: + br label %bb1 + +bb1: ; preds = %bb4, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb4 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb5 + +bb2: ; preds = %bb1 + %tmp = load i32, i32* %B, align 4 + %tmp3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp, i32* %tmp3, align 4 + br label %bb4 + +bb4: ; preds = %bb2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb5: ; preds = %bb1 + ret void +} diff --git a/polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll b/polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll new file mode 100644 index 00000000000..9d31c28bb90 --- /dev/null +++ b/polly/test/ScopInfo/invariant_load_base_pointer_in_conditional.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-scops -polly-ignore-aliasing -polly-detect-unprofitable -analyze < %s | FileCheck %s +; +; CHECK: Invariant Accesses: +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_bb5[i0] -> MemRef_BP[0] }; +; CHECK-NEXT: Execution Context: [N] -> { : N >= 514 } +; +; void f(int *BP, int *A, int N) { +; for (int i = 0; i < N; i++) +; if (i > 512) +; A[i] = *BP; +; else +; A[i] = 0; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %BP, i32* %A, i32 %N) { +bb: + %tmp = sext i32 %N to i64 + br label %bb1 + +bb1: ; preds = %bb11, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb11 ], [ 0, %bb ] + %tmp2 = icmp slt i64 %indvars.iv, %tmp + br i1 %tmp2, label %bb3, label %bb12 + +bb3: ; preds = %bb1 + %tmp4 = icmp sgt i64 %indvars.iv, 512 + br i1 %tmp4, label %bb5, label %bb8 + +bb5: ; preds = %bb3 + %tmp9a = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %inv = load i32, i32 *%BP + store i32 %inv, i32* %tmp9a, align 4 + br label %bb10 + +bb8: ; preds = %bb3 + %tmp9b = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp9b, align 4 + br label %bb10 + +bb10: ; preds = %bb8, %bb5 + br label %bb11 + +bb11: ; preds = %bb10 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb12: ; preds = %bb1 + ret void +} diff --git a/polly/test/ScopInfo/tempscop-printing.ll b/polly/test/ScopInfo/tempscop-printing.ll index 52027fef257..c537a291076 100644 --- a/polly/test/ScopInfo/tempscop-printing.ll +++ b/polly/test/ScopInfo/tempscop-printing.ll @@ -14,6 +14,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" ; CHECK-LABEL: Function: f +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] +; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 } + define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { entry: br label %for.i @@ -24,12 +28,8 @@ for.i: br label %entry.next entry.next: -; CHECK: Stmt_entry_next +; CHECK-NOT: Stmt_entry_next %init = load i64, i64* %init_ptr -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; br label %for.j for.j: @@ -55,6 +55,9 @@ return: } ; CHECK-LABEL: Function: g +; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: MemRef_init_ptr[0] +; CHECK-NEXT: Execution Context: [N] -> { : N >= 1 or N <= -1 } define void @g(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { entry: br label %for.i @@ -65,12 +68,8 @@ for.i: br label %entry.next entry.next: -; CHECK: Stmt_entry_next +; CHECK-NOT: Stmt_entry_next %init = load i64, i64* %init_ptr -; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init_ptr[0] }; -; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] -; CHECK-NEXT: [N] -> { Stmt_entry_next[i0] -> MemRef_init[] }; br label %for.j for.j: |