//===------ LoopGenerators.cpp - IR helper to create loops ---------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file contains functions to create scalar and parallel loops as LLVM-IR. // //===----------------------------------------------------------------------===// #include "polly/CodeGen/LoopGenerators.h" #include "polly/ScopDetection.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" using namespace llvm; using namespace polly; static cl::opt PollyNumThreads("polly-num-threads", cl::desc("Number of threads to use (0 = auto)"), cl::Hidden, cl::init(0)); // We generate a loop of either of the following structures: // // BeforeBB BeforeBB // | | // v v // GuardBB PreHeaderBB // / | | _____ // __ PreHeaderBB | v \/ | // / \ / | HeaderBB latch // latch HeaderBB | |\ | // \ / \ / | \------/ // < \ / | // \ / v // ExitBB ExitBB // // depending on whether or not we know that it is executed at least once. If // not, GuardBB checks if the loop is executed at least once. If this is the // case we branch to PreHeaderBB and subsequently to the HeaderBB, which // contains the loop iv 'polly.indvar', the incremented loop iv // 'polly.indvar_next' as well as the condition to check if we execute another // iteration of the loop. After the loop has finished, we branch to ExitBB. // We expect the type of UB, LB, UB+Stride to be large enough for values that // UB may take throughout the execution of the loop, including the computation // of indvar + Stride before the final abort. Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, PollyIRBuilder &Builder, LoopInfo &LI, DominatorTree &DT, BasicBlock *&ExitBB, ICmpInst::Predicate Predicate, ScopAnnotator *Annotator, bool Parallel, bool UseGuard, bool LoopVectDisabled) { Function *F = Builder.GetInsertBlock()->getParent(); LLVMContext &Context = F->getContext(); assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); IntegerType *LoopIVType = dyn_cast(UB->getType()); assert(LoopIVType && "UB is not integer?"); BasicBlock *BeforeBB = Builder.GetInsertBlock(); BasicBlock *GuardBB = UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); BasicBlock *PreHeaderBB = BasicBlock::Create(Context, "polly.loop_preheader", F); // Update LoopInfo Loop *OuterLoop = LI.getLoopFor(BeforeBB); Loop *NewLoop = LI.AllocateLoop(); if (OuterLoop) OuterLoop->addChildLoop(NewLoop); else LI.addTopLevelLoop(NewLoop); if (OuterLoop) { if (GuardBB) OuterLoop->addBasicBlockToLoop(GuardBB, LI); OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); } NewLoop->addBasicBlockToLoop(HeaderBB, LI); // Notify the annotator (if present) that we have a new loop, but only // after the header block is set. if (Annotator) Annotator->pushLoop(NewLoop, Parallel); // ExitBB ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); ExitBB->setName("polly.loop_exit"); // BeforeBB if (GuardBB) { BeforeBB->getTerminator()->setSuccessor(0, GuardBB); DT.addNewBlock(GuardBB, BeforeBB); // GuardBB Builder.SetInsertPoint(GuardBB); Value *LoopGuard; LoopGuard = Builder.CreateICmp(Predicate, LB, UB); LoopGuard->setName("polly.loop_guard"); Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); DT.addNewBlock(PreHeaderBB, GuardBB); } else { BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); DT.addNewBlock(PreHeaderBB, BeforeBB); } // PreHeaderBB Builder.SetInsertPoint(PreHeaderBB); Builder.CreateBr(HeaderBB); // HeaderBB DT.addNewBlock(HeaderBB, PreHeaderBB); Builder.SetInsertPoint(HeaderBB); PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); IV->addIncoming(LB, PreHeaderBB); Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); Value *LoopCondition = Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); // Create the loop latch and annotate it as such. BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); if (Annotator) Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); IV->addIncoming(IncrementedIV, HeaderBB); if (GuardBB) DT.changeImmediateDominator(ExitBB, GuardBB); else DT.changeImmediateDominator(ExitBB, HeaderBB); // The loop body should be added here. Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); return IV; } Value *ParallelLoopGenerator::createParallelLoop( Value *LB, Value *UB, Value *Stride, SetVector &UsedValues, ValueMapT &Map, BasicBlock::iterator *LoopBody) { Function *SubFn; AllocaInst *Struct = storeValuesIntoStruct(UsedValues); BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); Value *IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn); *LoopBody = Builder.GetInsertPoint(); Builder.SetInsertPoint(&*BeforeLoop); Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "polly.par.userContext"); // Add one as the upper bound provided by OpenMP is a < comparison // whereas the codegenForSequential function creates a <= comparison. UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); // Tell the runtime we start a parallel loop createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); Builder.CreateCall(SubFn, SubFnParam); createCallJoinThreads(); return IV; } void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB, Value *UB, Value *Stride) { const std::string Name = "GOMP_parallel_loop_runtime_start"; Function *F = M->getFunction(Name); // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; Type *Params[] = {PointerType::getUnqual(FunctionType::get( Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongType, LongType, LongType}; FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); F = Function::Create(Ty, Linkage, Name, M); } Value *NumberOfThreads = Builder.getInt32(PollyNumThreads); Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride}; Builder.CreateCall(F, Args); } Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr, Value *UBPtr) { const std::string Name = "GOMP_loop_runtime_next"; Function *F = M->getFunction(Name); // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); F = Function::Create(Ty, Linkage, Name, M); } Value *Args[] = {LBPtr, UBPtr}; Value *Return = Builder.CreateCall(F, Args); Return = Builder.CreateICmpNE( Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); return Return; } void ParallelLoopGenerator::createCallJoinThreads() { const std::string Name = "GOMP_parallel_end"; Function *F = M->getFunction(Name); // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); F = Function::Create(Ty, Linkage, Name, M); } Builder.CreateCall(F, {}); } void ParallelLoopGenerator::createCallCleanupThread() { const std::string Name = "GOMP_loop_end_nowait"; Function *F = M->getFunction(Name); // If F is not available, declare it. if (!F) { GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); F = Function::Create(Ty, Linkage, Name, M); } Builder.CreateCall(F, {}); } Function *ParallelLoopGenerator::createSubFnDefinition() { Function *F = Builder.GetInsertBlock()->getParent(); std::vector Arguments(1, Builder.getInt8PtrTy()); FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); Function *SubFn = Function::Create(FT, Function::InternalLinkage, F->getName() + "_polly_subfn", M); // Certain backends (e.g., NVPTX) do not support '.'s in function names. // Hence, we ensure that all '.'s are replaced by '_'s. std::string FunctionName = SubFn->getName(); std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); SubFn->setName(FunctionName); // Do not run any polly pass on the new function. SubFn->addFnAttr(PollySkipFnAttr); Function::arg_iterator AI = SubFn->arg_begin(); AI->setName("polly.par.userContext"); return SubFn; } AllocaInst * ParallelLoopGenerator::storeValuesIntoStruct(SetVector &Values) { SmallVector Members; for (Value *V : Values) Members.push_back(V->getType()); const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); // We do not want to allocate the alloca inside any loop, thus we allocate it // in the entry block of the function and use annotations to denote the actual // live span (similar to clang). BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); Instruction *IP = &*EntryBB.getFirstInsertionPt(); StructType *Ty = StructType::get(Builder.getContext(), Members); AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, "polly.par.userContext", IP); for (unsigned i = 0; i < Values.size(); i++) { Value *Address = Builder.CreateStructGEP(Ty, Struct, i); Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); Builder.CreateStore(Values[i], Address); } return Struct; } void ParallelLoopGenerator::extractValuesFromStruct( SetVector OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { for (unsigned i = 0; i < OldValues.size(); i++) { Value *Address = Builder.CreateStructGEP(Ty, Struct, i); Value *NewValue = Builder.CreateLoad(Address); NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); Map[OldValues[i]] = NewValue; } } Value *ParallelLoopGenerator::createSubFn(Value *Stride, AllocaInst *StructData, SetVector Data, ValueMapT &Map, Function **SubFnPtr) { BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB; Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV; Function *SubFn = createSubFnDefinition(); LLVMContext &Context = SubFn->getContext(); // Store the previous basic block. PrevBB = Builder.GetInsertBlock(); // Create basic blocks. HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn); PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); DT.addNewBlock(HeaderBB, PrevBB); DT.addNewBlock(ExitBB, HeaderBB); DT.addNewBlock(CheckNextBB, HeaderBB); DT.addNewBlock(PreHeaderBB, HeaderBB); // Fill up basic block HeaderBB. Builder.SetInsertPoint(HeaderBB); LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); UserContext = Builder.CreateBitCast( &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext"); extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, Map); Builder.CreateBr(CheckNextBB); // Add code to check if another set of iterations will be executed. Builder.SetInsertPoint(CheckNextBB); Ret1 = createCallGetWorkItem(LBPtr, UBPtr); HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock"); Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); // Add code to load the iv bounds for this set of iterations. Builder.SetInsertPoint(PreHeaderBB); LB = Builder.CreateLoad(LBPtr, "polly.par.LB"); UB = Builder.CreateLoad(UBPtr, "polly.par.UB"); // Subtract one as the upper bound provided by OpenMP is a < comparison // whereas the codegenForSequential function creates a <= comparison. UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), "polly.par.UBAdjusted"); Builder.CreateBr(CheckNextBB); Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false); BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); // Add code to terminate this subfunction. Builder.SetInsertPoint(ExitBB); createCallCleanupThread(); Builder.CreateRetVoid(); Builder.SetInsertPoint(&*LoopBody); *SubFnPtr = SubFn; return IV; }