diff options
| author | Tobias Grosser <tobias@grosser.es> | 2016-07-22 07:11:12 +0000 |
|---|---|---|
| committer | Tobias Grosser <tobias@grosser.es> | 2016-07-22 07:11:12 +0000 |
| commit | 74dc3cb431b438348c0c77b2c70029fc966f60b9 (patch) | |
| tree | 641f5896d5ee36a30a40e05e2c7c57d53fb3292b /polly/lib/CodeGen/PPCGCodeGeneration.cpp | |
| parent | d2ae303eb0d46e34b2d5dd5ffdd89b483df938a9 (diff) | |
| download | bcm5719-llvm-74dc3cb431b438348c0c77b2c70029fc966f60b9.tar.gz bcm5719-llvm-74dc3cb431b438348c0c77b2c70029fc966f60b9.zip | |
GPGPU: Generate PTX assembly code for the kernel modules
Run the NVPTX backend over the GPUModule IR and write the resulting assembly
code in a string.
To work correctly, it is important to invalidate analysis results that still
reference the IR in the kernel module. Hence, this change clears all references
to dominators, loop info, and scalar evolution.
Finally, the NVPTX backend has troubles to generate code for various special
floating point types (not surprising), but also for uncommon integer types. This
commit does not resolve these issues, but pulls out problematic test cases into
separate files to XFAIL them individually and resolve them in future (not
immediate) changes one by one.
llvm-svn: 276396
Diffstat (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp')
| -rw-r--r-- | polly/lib/CodeGen/PPCGCodeGeneration.cpp | 123 |
1 files changed, 123 insertions, 0 deletions
diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 8b4d2220297..74dd31d91fc 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -19,11 +19,18 @@ #include "polly/Options.h" #include "polly/ScopInfo.h" #include "polly/Support/SCEVValidator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Target/TargetMachine.h" #include "isl/union_map.h" @@ -57,6 +64,21 @@ static cl::opt<bool> DumpKernelIR("polly-acc-dump-kernel-ir", cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt<bool> DumpKernelASM("polly-acc-dump-kernel-asm", + cl::desc("Dump the kernel assembly code"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, + cl::cat(PollyCategory)); + +static cl::opt<bool> FastMath("polly-acc-fastmath", + cl::desc("Allow unsafe math optimizations"), + cl::Hidden, cl::init(false), cl::ZeroOrMore, + cl::cat(PollyCategory)); + +static cl::opt<std::string> + CudaVersion("polly-acc-cuda-version", + cl::desc("The CUDA version to compile for"), cl::Hidden, + cl::init("sm_30"), cl::ZeroOrMore, cl::cat(PollyCategory)); + /// Create the ast expressions for a ScopStmt. /// /// This function is a callback for to generate the ast expressions for each @@ -112,6 +134,12 @@ public: } private: + /// A vector of array base pointers for which a new ScopArrayInfo was created. + /// + /// This vector is used to delete the ScopArrayInfo when it is not needed any + /// more. + std::vector<Value *> LocalArrays; + /// A module containing GPU code. /// /// This pointer is only set in case we are currently generating GPU code. @@ -201,6 +229,26 @@ private: /// Create an in-kernel synchronization call. void createKernelSync(); + /// Create a PTX assembly string for the current GPU kernel. + /// + /// @returns A string containing the corresponding PTX assembly code. + std::string createKernelASM(); + + /// Remove references from the dominator tree to the kernel function @p F. + /// + /// @param F The function to remove references to. + void clearDominators(Function *F); + + /// Remove references from scalar evolution to the kernel function @p F. + /// + /// @param F The function to remove references to. + void clearScalarEvolution(Function *F); + + /// Remove references from loop info to the kernel function @p F. + /// + /// @param F The function to remove references to. + void clearLoops(Function *F); + /// Finalize the generation of the kernel function. /// /// Free the LLVM-IR module corresponding to the kernel and -- if requested -- @@ -360,6 +408,33 @@ SetVector<Value *> GPUNodeBuilder::getReferencesInKernel(ppcg_kernel *Kernel) { return SubtreeValues; } +void GPUNodeBuilder::clearDominators(Function *F) { + DomTreeNode *N = DT.getNode(&F->getEntryBlock()); + std::vector<BasicBlock *> Nodes; + for (po_iterator<DomTreeNode *> I = po_begin(N), E = po_end(N); I != E; ++I) + Nodes.push_back(I->getBlock()); + + for (BasicBlock *BB : Nodes) + DT.eraseNode(BB); +} + +void GPUNodeBuilder::clearScalarEvolution(Function *F) { + for (BasicBlock &BB : *F) { + Loop *L = LI.getLoopFor(&BB); + if (L) + SE.forgetLoop(L); + } +} + +void GPUNodeBuilder::clearLoops(Function *F) { + for (BasicBlock &BB : *F) { + Loop *L = LI.getLoopFor(&BB); + if (L) + SE.forgetLoop(L); + LI.removeBlock(&BB); + } +} + void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { isl_id *Id = isl_ast_node_get_annotation(KernelStmt); ppcg_kernel *Kernel = (ppcg_kernel *)isl_id_get_user(Id); @@ -392,6 +467,11 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { create(isl_ast_node_copy(Kernel->tree)); + Function *F = Builder.GetInsertBlock()->getParent(); + clearDominators(F); + clearScalarEvolution(F); + clearLoops(F); + Builder.SetInsertPoint(&HostInsertPoint); IDToValue = HostIDs; @@ -400,6 +480,10 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { PHIOpMap.clear(); EscapeMap.clear(); IDToSAI.clear(); + Annotator.resetAlternativeAliasBases(); + for (auto &BasePtr : LocalArrays) + S.invalidateScopArrayInfo(BasePtr, ScopArrayInfo::MK_Array); + LocalArrays.clear(); finalizeKernelFunction(); } @@ -471,6 +555,7 @@ GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel, } const ScopArrayInfo *SAIRep = S.getOrCreateScopArrayInfo(Val, EleTy, Sizes, ScopArrayInfo::MK_Array); + LocalArrays.push_back(Val); isl_ast_build_free(Build); isl_id_free(Id); @@ -555,11 +640,49 @@ void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel, insertKernelIntrinsics(Kernel); } +std::string GPUNodeBuilder::createKernelASM() { + llvm::Triple GPUTriple(Triple::normalize("nvptx64-nvidia-cuda")); + std::string ErrMsg; + auto GPUTarget = TargetRegistry::lookupTarget(GPUTriple.getTriple(), ErrMsg); + + if (!GPUTarget) { + errs() << ErrMsg << "\n"; + return ""; + } + + TargetOptions Options; + Options.UnsafeFPMath = FastMath; + std::unique_ptr<TargetMachine> TargetM( + GPUTarget->createTargetMachine(GPUTriple.getTriple(), CudaVersion, "", + Options, Optional<Reloc::Model>())); + + SmallString<0> ASMString; + raw_svector_ostream ASMStream(ASMString); + llvm::legacy::PassManager PM; + + PM.add(createTargetTransformInfoWrapperPass(TargetM->getTargetIRAnalysis())); + + if (TargetM->addPassesToEmitFile( + PM, ASMStream, TargetMachine::CGFT_AssemblyFile, true /* verify */)) { + errs() << "The target does not support generation of this file type!\n"; + return ""; + } + + PM.run(*GPUModule); + + return ASMStream.str(); +} + void GPUNodeBuilder::finalizeKernelFunction() { if (DumpKernelIR) outs() << *GPUModule << "\n"; + std::string Assembly = createKernelASM(); + + if (DumpKernelASM) + outs() << Assembly << "\n"; + GPUModule.release(); KernelIDs.clear(); } |

