From 577935962456e90580b24082473416f110f7705d Mon Sep 17 00:00:00 2001 From: Tobias Grosser Date: Mon, 25 Jul 2016 16:31:21 +0000 Subject: GPGPU: Load GPU kernels We embed the PTX code into the host IR as a global variable and compile it at run-time into a GPU kernel. llvm-svn: 276645 --- polly/lib/CodeGen/PPCGCodeGeneration.cpp | 63 ++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) (limited to 'polly/lib/CodeGen/PPCGCodeGeneration.cpp') diff --git a/polly/lib/CodeGen/PPCGCodeGeneration.cpp b/polly/lib/CodeGen/PPCGCodeGeneration.cpp index 3b5a5d3aabe..a5d55ea7ac5 100644 --- a/polly/lib/CodeGen/PPCGCodeGeneration.cpp +++ b/polly/lib/CodeGen/PPCGCodeGeneration.cpp @@ -281,7 +281,9 @@ private: /// /// Free the LLVM-IR module corresponding to the kernel and -- if requested -- /// dump its IR to stderr. - void finalizeKernelFunction(); + /// + /// @returns The Assembly string of the kernel. + std::string finalizeKernelFunction(); /// Create code that allocates memory to store arrays on device. void allocateDeviceArrays(); @@ -324,6 +326,19 @@ private: /// @param HostPtr A host pointer specifying the location to copy to. void createCallCopyFromDeviceToHost(Value *DevicePtr, Value *HostPtr, Value *Size); + + /// Create a call to get a kernel from an assembly string. + /// + /// @param Buffer The string describing the kernel. + /// @param Entry The name of the kernel function to call. + /// + /// @returns A pointer to a kernel object + Value *createCallGetKernel(Value *Buffer, Value *Entry); + + /// Create a call to free a GPU kernel. + /// + /// @param GPUKernel THe kernel to free. + void createCallFreeKernel(Value *GPUKernel); }; void GPUNodeBuilder::initializeAfterRTH() { @@ -360,6 +375,41 @@ void GPUNodeBuilder::freeDeviceArrays() { createCallFreeDeviceMemory(Array.second); } +Value *GPUNodeBuilder::createCallGetKernel(Value *Buffer, Value *Entry) { + const char *Name = "polly_getKernel"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector Args; + Args.push_back(Builder.getInt8PtrTy()); + Args.push_back(Builder.getInt8PtrTy()); + FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + return Builder.CreateCall(F, {Buffer, Entry}); +} + +void GPUNodeBuilder::createCallFreeKernel(Value *GPUKernel) { + const char *Name = "polly_freeKernel"; + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + Function *F = M->getFunction(Name); + + // If F is not available, declare it. + if (!F) { + GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; + std::vector Args; + Args.push_back(Builder.getInt8PtrTy()); + FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false); + F = Function::Create(Ty, Linkage, Name, M); + } + + Builder.CreateCall(F, {GPUKernel}); +} + void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) { const char *Name = "polly_freeDeviceMemory"; Module *M = Builder.GetInsertBlock()->getParent()->getParent(); @@ -755,7 +805,12 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) { S.invalidateScopArrayInfo(BasePtr, ScopArrayInfo::MK_Array); LocalArrays.clear(); - finalizeKernelFunction(); + std::string ASMString = finalizeKernelFunction(); + std::string Name = "kernel_" + std::to_string(Kernel->id); + Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name); + Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name"); + Value *GPUKernel = createCallGetKernel(KernelString, NameString); + createCallFreeKernel(GPUKernel); } /// Compute the DataLayout string for the NVPTX backend. @@ -943,7 +998,7 @@ std::string GPUNodeBuilder::createKernelASM() { return ASMStream.str(); } -void GPUNodeBuilder::finalizeKernelFunction() { +std::string GPUNodeBuilder::finalizeKernelFunction() { // Verify module. llvm::legacy::PassManager Passes; Passes.add(createVerifierPass()); @@ -967,6 +1022,8 @@ void GPUNodeBuilder::finalizeKernelFunction() { GPUModule.release(); KernelIDs.clear(); + + return Assembly; } namespace { -- cgit v1.2.3