diff options
18 files changed, 18 insertions, 1010 deletions
diff --git a/polly/include/polly/LinkAllPasses.h b/polly/include/polly/LinkAllPasses.h index df449997384..18379be6b62 100644 --- a/polly/include/polly/LinkAllPasses.h +++ b/polly/include/polly/LinkAllPasses.h @@ -33,7 +33,6 @@ llvm::Pass *createDOTOnlyPrinterPass(); llvm::Pass *createDOTOnlyViewerPass(); llvm::Pass *createDOTPrinterPass(); llvm::Pass *createDOTViewerPass(); -llvm::Pass *createIndependentBlocksPass(); llvm::Pass *createJSONExporterPass(); llvm::Pass *createJSONImporterPass(); llvm::Pass *createPollyCanonicalizePass(); @@ -43,7 +42,6 @@ llvm::Pass *createIslAstInfoPass(); llvm::Pass *createCodeGenerationPass(); llvm::Pass *createIslScheduleOptimizerPass(); -extern char &IndependentBlocksID; extern char &CodePreparationID; } @@ -64,7 +62,6 @@ struct PollyForcePassLinking { polly::createDOTOnlyViewerPass(); polly::createDOTPrinterPass(); polly::createDOTViewerPass(); - polly::createIndependentBlocksPass(); polly::createJSONExporterPass(); polly::createJSONImporterPass(); polly::createScopDetectionPass(); @@ -81,7 +78,6 @@ namespace llvm { class PassRegistry; void initializeCodePreparationPass(llvm::PassRegistry &); void initializeDeadCodeElimPass(llvm::PassRegistry &); -void initializeIndependentBlocksPass(llvm::PassRegistry &); void initializeJSONExporterPass(llvm::PassRegistry &); void initializeJSONImporterPass(llvm::PassRegistry &); void initializeIslAstInfoPass(llvm::PassRegistry &); diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index e69d0b98eff..f513c81e198 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -648,10 +648,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst, // Check that the base address of the access is invariant in the current // region. if (!isInvariant(*BaseValue, CurRegion)) - // Verification of this property is difficult as the independent blocks - // pass may introduce aliasing that we did not have when running the - // scop detection. - return invalid<ReportVariantBasePtr>(Context, /*Assert=*/false, BaseValue, + return invalid<ReportVariantBasePtr>(Context, /*Assert=*/true, BaseValue, &Inst); AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); @@ -684,8 +681,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst, AccessFunction, &Inst, BaseValue); } - // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions - // created by IndependentBlocks Pass. + // FIXME: Think about allowing IntToPtrInst if (IntToPtrInst *Inst = dyn_cast<IntToPtrInst>(BaseValue)) return invalid<ReportIntToPtr>(Context, /*Assert=*/true, Inst); @@ -699,13 +695,6 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst, AliasSet &AS = Context.AST.getAliasSetForPointer( BaseValue, MemoryLocation::UnknownSize, AATags); - // INVALID triggers an assertion in verifying mode, if it detects that a - // SCoP was detected by SCoP detection and that this SCoP was invalidated by - // a pass that stated it would preserve the SCoPs. We disable this check as - // the independent blocks pass may create memory references which seem to - // alias, if -basicaa is not available. They actually do not, but as we can - // not proof this without -basicaa we would fail. We disable this check to - // not cause irrelevant verification failures. if (!AS.isMustAlias()) { if (PollyUseRuntimeAliasChecks) { bool CanBuildRunTimeCheck = true; @@ -731,7 +720,7 @@ bool ScopDetection::isValidMemoryAccess(Instruction &Inst, if (CanBuildRunTimeCheck) return true; } - return invalid<ReportAlias>(Context, /*Assert=*/false, &Inst, AS); + return invalid<ReportAlias>(Context, /*Assert=*/true, &Inst, AS); } return true; diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 92a495089ad..7320a73ff8e 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -3651,7 +3651,6 @@ ScopInfo::~ScopInfo() { } void ScopInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequiredID(IndependentBlocksID); AU.addRequired<LoopInfoWrapperPass>(); AU.addRequired<RegionInfoPass>(); AU.addRequired<DominatorTreeWrapperPass>(); diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt index beba00d3685..fe51bd47d9a 100644 --- a/polly/lib/CMakeLists.txt +++ b/polly/lib/CMakeLists.txt @@ -49,7 +49,6 @@ add_polly_library(Polly Transform/Canonicalization.cpp Transform/CodePreparation.cpp Transform/DeadCodeElimination.cpp - Transform/IndependentBlocks.cpp Transform/ScheduleOptimizer.cpp ${POLLY_HEADER_FILES} ) diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index 7b92de0d9d2..4ea9db338bc 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -192,7 +192,6 @@ public: // region tree. AU.addPreserved<RegionInfoPass>(); AU.addPreserved<ScopInfo>(); - AU.addPreservedID(IndependentBlocksID); } }; } diff --git a/polly/lib/Makefile b/polly/lib/Makefile index 6a3b077f67e..e267f001138 100644 --- a/polly/lib/Makefile +++ b/polly/lib/Makefile @@ -135,7 +135,6 @@ SOURCES= Polly.cpp \ Transform/Canonicalization.cpp \ Transform/CodePreparation.cpp \ Transform/DeadCodeElimination.cpp \ - Transform/IndependentBlocks.cpp \ Transform/ScheduleOptimizer.cpp \ ${GPGPU_FILES} \ ${ISL_CODEGEN_FILES} \ diff --git a/polly/lib/Support/RegisterPasses.cpp b/polly/lib/Support/RegisterPasses.cpp index 895af356d98..c6bbb77e705 100644 --- a/polly/lib/Support/RegisterPasses.cpp +++ b/polly/lib/Support/RegisterPasses.cpp @@ -146,7 +146,6 @@ void initializePollyPasses(PassRegistry &Registry) { initializeCodePreparationPass(Registry); initializeDeadCodeElimPass(Registry); initializeDependenceInfoPass(Registry); - initializeIndependentBlocksPass(Registry); initializeJSONExporterPass(Registry); initializeJSONImporterPass(Registry); initializeIslAstInfoPass(Registry); diff --git a/polly/lib/Transform/IndependentBlocks.cpp b/polly/lib/Transform/IndependentBlocks.cpp deleted file mode 100644 index 1daf9ed9f04..00000000000 --- a/polly/lib/Transform/IndependentBlocks.cpp +++ /dev/null @@ -1,373 +0,0 @@ -//===------ IndependentBlocks.cpp - Create Independent Blocks in Regions --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Create independent blocks in the regions detected by ScopDetection. -// -//===----------------------------------------------------------------------===// -// -#include "polly/LinkAllPasses.h" -#include "polly/Options.h" -#include "polly/ScopDetection.h" -#include "polly/Support/ScopHelper.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/BasicAliasAnalysis.h" -#include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/PostDominators.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Transforms/Utils/Local.h" -#include <vector> - -using namespace polly; -using namespace llvm; - -#define DEBUG_TYPE "polly-independent" - -namespace { -struct IndependentBlocks : public FunctionPass { - RegionInfo *RI; - ScalarEvolution *SE; - ScopDetection *SD; - LoopInfo *LI; - - BasicBlock *AllocaBlock; - - static char ID; - - IndependentBlocks() : FunctionPass(ID) {} - - // Create new code for every instruction operator that can be expressed by a - // SCEV. Like this there are just two types of instructions left: - // - // 1. Instructions that only reference loop ivs or parameters outside the - // region. - // - // 2. Instructions that are not used for any memory modification. (These - // will be ignored later on.) - // - // Blocks containing only these kind of instructions are called independent - // blocks as they can be scheduled arbitrarily. - bool createIndependentBlocks(BasicBlock *BB, const Region *R); - bool createIndependentBlocks(const Region *R); - - // Elimination on the Scop to eliminate the scalar dependences come with - // trivially dead instructions. - bool eliminateDeadCode(const Region *R); - - //===--------------------------------------------------------------------===// - /// Non trivial scalar dependences checking functions. - /// Non trivial scalar dependences occur when the def and use are located in - /// different BBs and we can not move them into the same one. This will - /// prevent use from schedule BBs arbitrarily. - /// - /// @brief This function checks if a scalar value that is part of the - /// Scop is used outside of the Scop. - /// - /// @param Use The use of the instruction. - /// @param R The maximum region in the Scop. - /// - /// @return Return true if the Use of an instruction and the instruction - /// itself form a non trivial scalar dependence. - static bool isEscapeUse(const Value *Use, const Region *R); - - //===--------------------------------------------------------------------===// - /// Operand tree moving functions. - /// Trivial scalar dependences can eliminate by move the def to the same BB - /// that containing use. - /// - /// @brief Check if the instruction can be moved to another place safely. - /// - /// @param Inst The instruction. - /// - /// @return Return true if the instruction can be moved safely, false - /// otherwise. - static bool isSafeToMove(Instruction *Inst); - - typedef std::map<Instruction *, Instruction *> ReplacedMapType; - - /// @brief Move all safe to move instructions in the Operand Tree (DAG) to - /// eliminate trivial scalar dependences. - /// - /// @param Inst The root of the operand Tree. - /// @param R The maximum region in the Scop. - /// @param ReplacedMap The map that mapping original instruction to the moved - /// instruction. - /// @param InsertPos The insert position of the moved instructions. - void moveOperandTree(Instruction *Inst, const Region *R, - ReplacedMapType &ReplacedMap, Instruction *InsertPos); - - bool isIndependentBlock(const Region *R, BasicBlock *BB) const; - bool areAllBlocksIndependent(const Region *R) const; - - bool runOnFunction(Function &F); - void verifyAnalysis() const; - void verifyScop(const Region *R) const; - void getAnalysisUsage(AnalysisUsage &AU) const; -}; -} - -bool IndependentBlocks::isSafeToMove(Instruction *Inst) { - if (Inst->mayReadFromMemory() || Inst->mayWriteToMemory()) - return false; - - return isSafeToSpeculativelyExecute(Inst); -} - -void IndependentBlocks::moveOperandTree(Instruction *Inst, const Region *R, - ReplacedMapType &ReplacedMap, - Instruction *InsertPos) { - BasicBlock *CurBB = Inst->getParent(); - - // Depth first traverse the operand tree (or operand dag, because we will - // stop at PHINodes, so there are no cycle). - typedef Instruction::op_iterator ChildIt; - std::vector<std::pair<Instruction *, ChildIt>> WorkStack; - - WorkStack.push_back(std::make_pair(Inst, Inst->op_begin())); - DenseSet<Instruction *> VisitedSet; - - while (!WorkStack.empty()) { - Instruction *CurInst = WorkStack.back().first; - ChildIt It = WorkStack.back().second; - DEBUG(dbgs() << "Checking Operand of Node:\n" << *CurInst << "\n------>\n"); - if (It == CurInst->op_end()) { - // Insert the new instructions in topological order. - if (!CurInst->getParent()) { - CurInst->insertBefore(InsertPos); - SE->forgetValue(CurInst); - } - - WorkStack.pop_back(); - } else { - // for each node N, - Instruction *Operand = dyn_cast<Instruction>(*It); - ++WorkStack.back().second; - - // Can not move no instruction value. - if (Operand == 0) - continue; - - DEBUG(dbgs() << "For Operand:\n" << *Operand << "\n--->"); - - // If the Scop Region does not contain N, skip it and all its operands and - // continue: because we reach a "parameter". - // FIXME: we must keep the predicate instruction inside the Scop, - // otherwise it will be translated to a load instruction, and we can not - // handle load as affine predicate at this moment. - if (!R->contains(Operand) && !isa<TerminatorInst>(CurInst)) { - DEBUG(dbgs() << "Out of region.\n"); - continue; - } - - if (canSynthesize(Operand, LI, SE, R)) { - DEBUG(dbgs() << "is IV.\n"); - continue; - } - - // We can not move the operand, a non trivial scalar dependence found! - if (!isSafeToMove(Operand)) { - DEBUG(dbgs() << "Can not move!\n"); - continue; - } - - // Do not need to move instruction if it is contained in the same BB with - // the root instruction. - if (Operand->getParent() == CurBB) { - DEBUG(dbgs() << "No need to move.\n"); - // Try to move its operand, but do not visit an instuction twice. - if (VisitedSet.insert(Operand).second) - WorkStack.push_back(std::make_pair(Operand, Operand->op_begin())); - continue; - } - - // Now we need to move Operand to CurBB. - // Check if we already moved it. - ReplacedMapType::iterator At = ReplacedMap.find(Operand); - if (At != ReplacedMap.end()) { - DEBUG(dbgs() << "Moved.\n"); - Instruction *MovedOp = At->second; - It->set(MovedOp); - SE->forgetValue(MovedOp); - } else { - // Note that NewOp is not inserted in any BB now, we will insert it when - // it popped form the work stack, so it will be inserted in topological - // order. - Instruction *NewOp = Operand->clone(); - NewOp->setName(Operand->getName() + ".moved.to." + CurBB->getName()); - DEBUG(dbgs() << "Move to " << *NewOp << "\n"); - It->set(NewOp); - ReplacedMap.insert(std::make_pair(Operand, NewOp)); - SE->forgetValue(Operand); - - // Process its operands, but do not visit an instuction twice. - if (VisitedSet.insert(NewOp).second) - WorkStack.push_back(std::make_pair(NewOp, NewOp->op_begin())); - } - } - } - - SE->forgetValue(Inst); -} - -bool IndependentBlocks::createIndependentBlocks(BasicBlock *BB, - const Region *R) { - std::vector<Instruction *> WorkList; - for (Instruction &Inst : *BB) - if (!isSafeToMove(&Inst) && !canSynthesize(&Inst, LI, SE, R)) - WorkList.push_back(&Inst); - - ReplacedMapType ReplacedMap; - Instruction *InsertPos = BB->getFirstNonPHIOrDbg(); - - for (Instruction *Inst : WorkList) - if (!isa<PHINode>(Inst)) - moveOperandTree(Inst, R, ReplacedMap, InsertPos); - - // The BB was changed if we replaced any operand. - return !ReplacedMap.empty(); -} - -bool IndependentBlocks::createIndependentBlocks(const Region *R) { - bool Changed = false; - - for (BasicBlock *BB : R->blocks()) - Changed |= createIndependentBlocks(BB, R); - - return Changed; -} - -bool IndependentBlocks::eliminateDeadCode(const Region *R) { - std::vector<Instruction *> WorkList; - - // Find all trivially dead instructions. - for (BasicBlock *BB : R->blocks()) - for (Instruction &Inst : *BB) - if (!isIgnoredIntrinsic(&Inst) && isInstructionTriviallyDead(&Inst)) - WorkList.push_back(&Inst); - - if (WorkList.empty()) - return false; - - // Delete them so the cross BB scalar dependences come with them will - // also be eliminated. - while (!WorkList.empty()) { - RecursivelyDeleteTriviallyDeadInstructions(WorkList.back()); - WorkList.pop_back(); - } - - return true; -} - -bool IndependentBlocks::isEscapeUse(const Value *Use, const Region *R) { - // Non-instruction user will never escape. - if (!isa<Instruction>(Use)) - return false; - - return !R->contains(cast<Instruction>(Use)); -} - -bool IndependentBlocks::isIndependentBlock(const Region *R, - BasicBlock *BB) const { - for (Instruction &Inst : *BB) { - if (canSynthesize(&Inst, LI, SE, R)) - continue; - if (isIgnoredIntrinsic(&Inst)) - continue; - - // A value inside the Scop is referenced outside. - for (User *U : Inst.users()) { - if (isEscapeUse(U, R)) { - DEBUG(dbgs() << "Instruction not independent:\n"); - DEBUG(dbgs() << "Instruction used outside the Scop!\n"); - DEBUG(Inst.print(dbgs())); - DEBUG(dbgs() << "\n"); - return false; - } - } - } - - return true; -} - -bool IndependentBlocks::areAllBlocksIndependent(const Region *R) const { - for (BasicBlock *BB : R->blocks()) - if (!isIndependentBlock(R, BB)) - return false; - - return true; -} - -void IndependentBlocks::getAnalysisUsage(AnalysisUsage &AU) const { - // FIXME: If we set preserves cfg, the cfg only passes do not need to - // be "addPreserved"? - AU.addPreserved<AAResultsWrapperPass>(); - AU.addPreserved<BasicAAWrapperPass>(); - AU.addPreserved<DominatorTreeWrapperPass>(); - AU.addPreserved<DominanceFrontier>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - AU.addPreserved<PostDominatorTree>(); - AU.addRequired<RegionInfoPass>(); - AU.addPreserved<RegionInfoPass>(); - AU.addRequired<LoopInfoWrapperPass>(); - AU.addPreserved<LoopInfoWrapperPass>(); - AU.addRequired<ScalarEvolutionWrapperPass>(); - AU.addPreserved<ScalarEvolutionWrapperPass>(); - AU.addPreserved<SCEVAAWrapperPass>(); - AU.addRequired<ScopDetection>(); - AU.addPreserved<ScopDetection>(); -} - -bool IndependentBlocks::runOnFunction(llvm::Function &F) { - - bool Changed = false; - - RI = &getAnalysis<RegionInfoPass>().getRegionInfo(); - LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); - SD = &getAnalysis<ScopDetection>(); - SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); - - AllocaBlock = &F.getEntryBlock(); - - DEBUG(dbgs() << "Run IndepBlock on " << F.getName() << '\n'); - - for (const Region *R : *SD) { - Changed |= createIndependentBlocks(R); - Changed |= eliminateDeadCode(R); - } - - verifyAnalysis(); - - return Changed; -} - -void IndependentBlocks::verifyAnalysis() const {} - -void IndependentBlocks::verifyScop(const Region *R) const { - assert(areAllBlocksIndependent(R) && "Cannot generate independent blocks"); -} - -char IndependentBlocks::ID = 0; -char &polly::IndependentBlocksID = IndependentBlocks::ID; - -Pass *polly::createIndependentBlocksPass() { return new IndependentBlocks(); } - -INITIALIZE_PASS_BEGIN(IndependentBlocks, "polly-independent", - "Polly - Create independent blocks", false, false); -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass); -INITIALIZE_PASS_DEPENDENCY(RegionInfoPass); -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass); -INITIALIZE_PASS_DEPENDENCY(ScopDetection); -INITIALIZE_PASS_END(IndependentBlocks, "polly-independent", - "Polly - Create independent blocks", false, false) diff --git a/polly/test/IndependentBlocks/inter_bb_scalar_dep.ll b/polly/test/IndependentBlocks/inter_bb_scalar_dep.ll deleted file mode 100644 index c1102a54543..00000000000 --- a/polly/test/IndependentBlocks/inter_bb_scalar_dep.ll +++ /dev/null @@ -1,48 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; for (i = 0; i < N; ++i) { -; A[i] = init + 2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; SCALARACCESS-NOT: alloca - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - %init = load i64, i64* %init_ptr -; SCALARACCESS-NOT: store - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init_plus_two = add i64 %init, 2 -; SCALARACCESS: %init_plus_two = add i64 %init, 2 - %scevgep = getelementptr i64, i64* %A, i64 %indvar.j - store i64 %init_plus_two, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} diff --git a/polly/test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll b/polly/test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll deleted file mode 100644 index b8960faa470..00000000000 --- a/polly/test/IndependentBlocks/intra_and_inter_bb_scalar_dep.ll +++ /dev/null @@ -1,56 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; for (i = 0; i < N; ++i) { -; init2 = *init_ptr; -; A[i] = init + init2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; SCALARACCESS-NOT: alloca - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - %init = load i64, i64* %init_ptr -; SCALARACCESS-NOT: store - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init_2 = load i64, i64* %init_ptr - %init_sum = add i64 %init, %init_2 - -; The SCEV of %init_sum is (%init + %init_2). It is referring to both an -; UnknownValue in the same and in a different basic block. We want only the -; reference to the different basic block to be replaced. - -; SCALARACCESS: %init_2 = load i64, i64* %init_ptr -; SCALARACCESS: %init_sum = add i64 %init, %init_2 - %scevgep = getelementptr i64, i64* %A, i64 %indvar.j - store i64 %init_sum, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} diff --git a/polly/test/IndependentBlocks/intra_bb_scalar_dep.ll b/polly/test/IndependentBlocks/intra_bb_scalar_dep.ll deleted file mode 100644 index 36339abf22d..00000000000 --- a/polly/test/IndependentBlocks/intra_bb_scalar_dep.ll +++ /dev/null @@ -1,50 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s - -; void f(long A[], int N, int *init_ptr) { -; long i, j; -; -; for (i = 0; i < N; ++i) { -; for (i = 0; i < N; ++i) { -; init = *init_ptr; -; A[i] = init + 2; -; } -; } -; } - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" - -define void @f(i64* noalias %A, i64 %N, i64* noalias %init_ptr) nounwind { -entry: - -; CHECK: entry -; CHECK: br label %for.i - br label %for.i - -for.i: - %indvar.i = phi i64 [ 0, %entry ], [ %indvar.i.next, %for.i.end ] - %indvar.i.next = add nsw i64 %indvar.i, 1 - br label %entry.next - -entry.next: - br label %for.j - -for.j: - %indvar.j = phi i64 [ 0, %entry.next ], [ %indvar.j.next, %for.j ] - %init = load i64, i64* %init_ptr - %init_plus_two = add i64 %init, 2 -; The scalar evolution of %init_plus_two is (2 + %init). So we have a -; non-trivial scalar evolution referring to a value in the same basic block. -; We want to ensure that this scalar is not translated into a memory copy. - %scevgep = getelementptr i64, i64* %A, i64 %indvar.j - store i64 %init_plus_two, i64* %scevgep - %indvar.j.next = add nsw i64 %indvar.j, 1 - %exitcond.j = icmp eq i64 %indvar.j.next, %N - br i1 %exitcond.j, label %for.i.end, label %for.j - -for.i.end: - %exitcond.i = icmp eq i64 %indvar.i.next, %N - br i1 %exitcond.i, label %return, label %for.i - -return: - ret void -} diff --git a/polly/test/IndependentBlocks/phi_outside_scop.ll b/polly/test/IndependentBlocks/phi_outside_scop.ll deleted file mode 100644 index 2a7eb5eda0d..00000000000 --- a/polly/test/IndependentBlocks/phi_outside_scop.ll +++ /dev/null @@ -1,33 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALAR -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -define void @phi_nodes_outside() { -entry: - br label %for.i.1 - -for.i.1: - %i.1 = phi i32 [ %i.1.next, %for.i.1 ], [ 0, %entry ] - %i.1.next = add nsw i32 %i.1, 1 - br i1 false, label %for.i.1 , label %for.i.2.preheader - -for.i.2.preheader: - br label %for.i.2 - -for.i.2: -; The value of %i.1.next is used outside of the scop in a PHI node. - %i.2 = phi i32 [ %i.2.next , %for.i.2 ], [ %i.1.next, %for.i.2.preheader ] - %i.2.next = add nsw i32 %i.2, 1 - fence seq_cst - br i1 false, label %for.i.2, label %cleanup - -cleanup: - ret void -} - -; SCALAR-NOT: alloca - -; SCALAR: for.i.2.preheader: -; SCALAR-NOT: load - -; SCALAR: for.i.2: -; SCALAR: %i.2 = phi i32 [ %i.2.next, %for.i.2 ], [ %i.1.next, %for.i.2.preheader ] diff --git a/polly/test/IndependentBlocks/scalar_to_array.ll b/polly/test/IndependentBlocks/scalar_to_array.ll deleted file mode 100644 index 49f7842a3d7..00000000000 --- a/polly/test/IndependentBlocks/scalar_to_array.ll +++ /dev/null @@ -1,222 +0,0 @@ -; RUN: opt %loadPolly -basicaa -polly-independent -S < %s | FileCheck %s -check-prefix=SCALARACCESS -; RAUN: opt %loadPolly -basicaa -polly-independent < %s -S | FileCheck %s -check-prefix=SCALARACCESS - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" - -@A = common global [1024 x float] zeroinitializer, align 8 - -define i32 @empty() nounwind { -entry: - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - - -; SCALARACCESS-LABEL: @array_access() -define i32 @array_access() nounwind { -entry: - fence seq_cst - br label %for.cond -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %float = uitofp i64 %indvar to float - store float %float, float* %arrayidx - br label %for.inc - -; SCALARACCESS: for.body: -; SCALARACCESS: %float = uitofp i64 %indvar to float -; SCALARACCESS: store float %float, float* %arrayidx - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; SCALARACCESS-LABEL: @intra_scop_dep() -define i32 @intra_scop_dep() nounwind { -entry: - fence seq_cst - br label %for.cond - -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca -; SCALARACCESS: fence - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body.a, label %return - -for.body.a: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %scalar = load float, float* %arrayidx - br label %for.body.b - -; SCALARACCESS: for.body.a: -; SCALARACCESS: %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar -; SCALARACCESS: %scalar = load float, float* %arrayidx -; SCALARACCESS-NOT: store -; SCALARACCESS: br label %for.body.b - -for.body.b: - %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %float = uitofp i64 %indvar to float - %sum = fadd float %scalar, %float - store float %sum, float* %arrayidx2 - br label %for.inc - -; SCALARACCESS: for.body.b: -; SCALARACCESS: %arrayidx2 = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar -; SCALARACCESS: %float = uitofp i64 %indvar to float -; SCALARACCESS-NOT: load -; SCALARACCESS: %sum = fadd float %scalar, %float -; SCALARACCESS: store float %sum, float* %arrayidx2 -; SCALARACCESS: br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; It is not possible to have a scop which accesses a scalar element that is -; a global variable. All global variables are pointers containing possibly -; a single element. - -; SCALARACCESS-LABEL: @use_after_scop() -define i32 @use_after_scop() nounwind { -entry: - fence seq_cst - br label %for.head - -; SCALARACCESS: entry: -; SCALARACCESS-NOT: alloca -; SCALARACCESS: fence - -for.head: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - br label %for.body - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - %scalar = load float, float* %arrayidx - br label %for.inc - -; SCALARACCESS: for.body: -; SCALARACCESS: %scalar = load float, float* %arrayidx -; SCALARACCESS-NOT: store float %scalar - -for.inc: - %indvar.next = add i64 %indvar, 1 - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.head, label %for.after - -for.after: - fence seq_cst - %return_value = fptosi float %scalar to i32 - br label %return - -; SCALARACCESS: for.after: -; SCALARACCESS: fence seq_cst -; SCALARACCESS: %return_value = fptosi float %scalar to i32 - -return: - ret i32 %return_value -} - -; We currently do not transform scalar references, that have only read accesses -; in the scop. There are two reasons for this: -; -; o We don't introduce additional memory references which may yield to compile -; time overhead. -; o For integer values, such a translation may block the use of scalar -; evolution on those values. -; -; SCALARACCESS-LABEL: @before_scop() -define i32 @before_scop() nounwind { -entry: - br label %preheader - -preheader: - %scalar = fadd float 4.0, 5.0 - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %preheader ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - store float %scalar, float* %arrayidx - br label %for.inc - -; SCALARACCESS: for.body: -; SCALARACCESS: store float %scalar, float* %arrayidx - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} - -; Currently not working -; SCALARACCESS-LABEL: @param_before_scop( -define i32 @param_before_scop(float %scalar) nounwind { -entry: - fence seq_cst - br label %for.cond - -for.cond: - %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %entry ] - %exitcond = icmp ne i64 %indvar, 1024 - br i1 %exitcond, label %for.body, label %return - -for.body: - %arrayidx = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar - store float %scalar, float* %arrayidx - br label %for.inc - -for.inc: - %indvar.next = add i64 %indvar, 1 - br label %for.cond - -return: - fence seq_cst - ret i32 0 -} diff --git a/polly/test/Isl/CodeGen/srem-in-other-bb.ll b/polly/test/Isl/CodeGen/srem-in-other-bb.ll index e0e072f58bb..77ec86367ec 100644 --- a/polly/test/Isl/CodeGen/srem-in-other-bb.ll +++ b/polly/test/Isl/CodeGen/srem-in-other-bb.ll @@ -6,9 +6,13 @@ ; A[n % 42] += 1; ; } ; -; CHECK: polly.stmt.bb3: -; CHECK: %p_tmp.moved.to.bb3 = srem i64 %n, 42 -; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %p_tmp.moved.to.bb3 +; CHECK: polly.stmt.bb2: +; CHECK-NEXT: %p_tmp = srem i64 %n, 42 +; CHECK-NEXT: store i64 %p_tmp, i64* %tmp.s2a +; +; CHECK: polly.stmt.bb3: +; CHECK: %tmp.s2a.reload = load i64, i64* %tmp.s2a +; CHECK: %p_tmp3 = getelementptr inbounds float, float* %A, i64 %tmp.s2a.reload define void @pos(float* %A, i64 %n) { bb: diff --git a/polly/test/Isl/CodeGen/20110312-Fail-without-basicaa.ll b/polly/test/ScopInfo/20110312-Fail-without-basicaa.ll index 4228cc04c4b..838c491fb5a 100644 --- a/polly/test/Isl/CodeGen/20110312-Fail-without-basicaa.ll +++ b/polly/test/ScopInfo/20110312-Fail-without-basicaa.ll @@ -1,5 +1,5 @@ ; This should be run without alias analysis enabled. -;RUN: opt %loadPolly -polly-independent < %s +;RUN: opt %loadPolly -polly-scops < %s target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" define i32 @main() nounwind { diff --git a/polly/test/ScopInfo/independent-blocks-never-stop-on-big-scop.ll b/polly/test/ScopInfo/independent-blocks-never-stop-on-big-scop.ll deleted file mode 100644 index 574a6b61971..00000000000 --- a/polly/test/ScopInfo/independent-blocks-never-stop-on-big-scop.ll +++ /dev/null @@ -1,199 +0,0 @@ -; RUN: opt %loadPolly -polly-independent < %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" - -define i32 @main() nounwind uwtable readnone { - %arr = alloca [100 x i32], align 16 - br label %1 - -; <label>:1 ; preds = %1, %0 - %indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %1 ] - %2 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv3 - %3 = trunc i64 %indvars.iv3 to i32 - store i32 %3, i32* %2, align 4, !tbaa !0 - %indvars.iv.next4 = add i64 %indvars.iv3, 1 - %lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32 - %exitcond6 = icmp eq i32 %lftr.wideiv5, 100 - br i1 %exitcond6, label %.preheader, label %1 - -.preheader: ; preds = %.preheader, %1 - %indvars.iv = phi i64 [ %indvars.iv.next, %.preheader ], [ 0, %1 ] - %4 = getelementptr inbounds [100 x i32], [100 x i32]* %arr, i64 0, i64 %indvars.iv - %5 = load i32, i32* %4, align 4, !tbaa !0 - %6 = xor i32 %5, -1 - %7 = shl i32 %5, 15 - %8 = add nsw i32 %7, %6 - %9 = ashr i32 %8, 12 - %10 = xor i32 %9, %8 - %11 = mul i32 %10, 9 - %12 = ashr i32 %11, 4 - %13 = xor i32 %12, %11 - %14 = mul nsw i32 %13, 20571 - %15 = ashr i32 %14, 16 - %16 = xor i32 %15, %14 - %17 = xor i32 %16, -1 - %18 = shl i32 %16, 15 - %19 = add nsw i32 %18, %17 - %20 = ashr i32 %19, 12 - %21 = xor i32 %20, %19 - %22 = mul i32 %21, 5 - %23 = ashr i32 %22, 4 - %24 = xor i32 %23, %22 - %25 = mul nsw i32 %24, 20576 - %26 = ashr i32 %25, 16 - %27 = xor i32 %26, %25 - %28 = xor i32 %27, -1 - %29 = shl i32 %27, 15 - %30 = add nsw i32 %29, %28 - %31 = ashr i32 %30, 12 - %32 = xor i32 %31, %30 - %33 = mul i32 %32, 5 - %34 = ashr i32 %33, 4 - %35 = xor i32 %34, %33 - %36 = mul nsw i32 %35, 2057 - %37 = ashr i32 %36, 16 - %38 = xor i32 %37, %36 - %39 = xor i32 %38, -1 - %40 = shl i32 %38, 15 - %41 = add nsw i32 %40, %39 - %42 = ashr i32 %41, 12 - %43 = xor i32 %42, %41 - %44 = mul i32 %43, 5 - %45 = ashr i32 %44, 4 - %46 = xor i32 %45, %44 - %47 = mul nsw i32 %46, 20572 - %48 = ashr i32 %47, 16 - %49 = xor i32 %48, %47 - %50 = xor i32 %49, -1 - %51 = shl i32 %49, 15 - %52 = add nsw i32 %51, %50 - %53 = ashr i32 %52, 12 - %54 = xor i32 %53, %52 - %55 = mul i32 %54, 5 - %56 = ashr i32 %55, 4 - %57 = xor i32 %56, %55 - %58 = mul nsw i32 %57, 2051 - %59 = ashr i32 %58, 16 - %60 = xor i32 %59, %58 - %61 = xor i32 %60, -1 - %62 = shl i32 %60, 15 - %63 = add nsw i32 %62, %61 - %64 = ashr i32 %63, 12 - %65 = xor i32 %64, %63 - %66 = mul i32 %65, 5 - %67 = ashr i32 %66, 4 - %68 = xor i32 %67, %66 - %69 = mul nsw i32 %68, 2057 - %70 = ashr i32 %69, 16 - %71 = xor i32 %70, %69 - %72 = xor i32 %71, -1 - %73 = shl i32 %71, 15 - %74 = add nsw i32 %73, %72 - %75 = ashr i32 %74, 12 - %76 = xor i32 %75, %74 - %77 = mul i32 %76, 5 - %78 = ashr i32 %77, 4 - %79 = xor i32 %78, %77 - %80 = mul nsw i32 %79, 205 - %81 = ashr i32 %80, 17 - %82 = xor i32 %81, %80 - %83 = xor i32 %82, -1 - %84 = shl i32 %82, 15 - %85 = add nsw i32 %84, %83 - %86 = ashr i32 %85, 12 - %87 = xor i32 %86, %85 - %88 = mul i32 %87, 5 - %89 = ashr i32 %88, 4 - %90 = xor i32 %89, %88 - %91 = mul nsw i32 %90, 2057 - %92 = ashr i32 %91, 16 - %93 = xor i32 %92, %91 - %94 = xor i32 %93, -1 - %95 = shl i32 %93, 15 - %96 = add nsw i32 %95, %94 - %97 = ashr i32 %96, 12 - %98 = xor i32 %97, %96 - %99 = mul i32 %98, 5 - %100 = ashr i32 %99, 3 - %101 = xor i32 %100, %99 - %102 = mul nsw i32 %101, 20571 - %103 = ashr i32 %102, 16 - %104 = xor i32 %103, %102 - %105 = xor i32 %104, -1 - %106 = shl i32 %104, 15 - %107 = add nsw i32 %106, %105 - %108 = ashr i32 %107, 12 - %109 = xor i32 %108, %107 - %110 = mul i32 %109, 5 - %111 = ashr i32 %110, 4 - %112 = xor i32 %111, %110 - %113 = mul nsw i32 %112, 2057 - %114 = ashr i32 %113, 16 - %115 = xor i32 %114, %113 - %116 = xor i32 %115, -1 - %117 = shl i32 %115, 15 - %118 = add nsw i32 %117, %116 - %119 = ashr i32 %118, 12 - %120 = xor i32 %119, %118 - %121 = mul i32 %120, 5 - %122 = ashr i32 %121, 4 - %123 = xor i32 %122, %121 - %124 = mul nsw i32 %123, 20572 - %125 = ashr i32 %124, 16 - %126 = xor i32 %125, %124 - %127 = xor i32 %126, -1 - %128 = shl i32 %126, 15 - %129 = add nsw i32 %128, %127 - %130 = ashr i32 %129, 12 - %131 = xor i32 %130, %129 - %132 = mul i32 %131, 5 - %133 = ashr i32 %132, 4 - %134 = xor i32 %133, %132 - %135 = mul nsw i32 %134, 2057 - %136 = ashr i32 %135, 16 - %137 = xor i32 %136, %135 - %138 = xor i32 %137, -1 - %139 = shl i32 %137, 15 - %140 = add nsw i32 %139, %138 - %141 = ashr i32 %140, 12 - %142 = xor i32 %141, %140 - %143 = mul i32 %142, 5 - %144 = ashr i32 %143, 4 - %145 = xor i32 %144, %143 - %146 = mul nsw i32 %145, 2057 - %147 = ashr i32 %146, 16 - %148 = xor i32 %147, %146 - %149 = xor i32 %148, -1 - %150 = shl i32 %148, 15 - %151 = add nsw i32 %150, %149 - %152 = ashr i32 %151, 12 - %153 = xor i32 %152, %151 - %154 = mul i32 %153, 5 - %155 = ashr i32 %154, 4 - %156 = xor i32 %155, %154 - %157 = mul nsw i32 %156, 2057 - %158 = ashr i32 %157, 16 - %159 = xor i32 %158, %157 - %160 = xor i32 %159, -1 - %161 = shl i32 %159, 15 - %162 = add nsw i32 %161, %160 - %163 = ashr i32 %162, 12 - %164 = xor i32 %163, %162 - %165 = mul i32 %164, 5 - %166 = ashr i32 %165, 4 - %167 = xor i32 %166, %165 - %168 = mul nsw i32 %167, 2057 - %169 = ashr i32 %168, 16 - %170 = xor i32 %169, %168 - store i32 %170, i32* %4, align 4, !tbaa !0 - %indvars.iv.next = add i64 %indvars.iv, 1 - %lftr.wideiv = trunc i64 %indvars.iv.next to i32 - %exitcond = icmp eq i32 %lftr.wideiv, 100 - br i1 %exitcond, label %171, label %.preheader - -; <label>:171 ; preds = %.preheader - ret i32 0 -} - -!0 = !{!"int", !1} -!1 = !{!"omnipotent char", !2} -!2 = !{!"Simple C/C++ TBAA", null} diff --git a/polly/test/ScopInfo/scalar_dependence_cond_br.ll b/polly/test/ScopInfo/scalar_dependence_cond_br.ll index fafd74dc559..6ae5ed21852 100644 --- a/polly/test/ScopInfo/scalar_dependence_cond_br.ll +++ b/polly/test/ScopInfo/scalar_dependence_cond_br.ll @@ -6,11 +6,13 @@ ; A[i]++; ; } ; +; FIXME: This test is a negative test until we have an independent blocks alternative. +; ; We should move operands as close to their use as possible, hence in this case ; there should not be any scalar dependence anymore after %cmp1 is moved to ; %for.body (%c and %indvar.iv are synthesis able). ; -; CHECK-NOT: [Scalar: 1] +; CHECK: [Scalar: 1] ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/IndependentBlocks/scev-invalidated.ll b/polly/test/ScopInfo/scev-invalidated.ll index 1a49972eacf..1ed7798ba67 100644 --- a/polly/test/IndependentBlocks/scev-invalidated.ll +++ b/polly/test/ScopInfo/scev-invalidated.ll @@ -1,4 +1,7 @@ -; RUN: opt %loadPolly -polly-independent < %s +; RUN: opt %loadPolly -polly-scops < %s +; +; CHECK: Region: %if.then6---%return +; target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" define void @arc_either() { |