diff options
| author | Alex Zinenko <zinenko@google.com> | 2019-08-13 01:38:54 -0700 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-08-13 01:39:21 -0700 |
| commit | 88de8b2a2bb09e4e3ebb829949e0256c3e86c609 (patch) | |
| tree | a2d2169e44ed00aeafdb407a6ac7ac4dc917ebcc /mlir/lib/Conversion/GPUToCUDA | |
| parent | 926fb685deadfed2042163145ac52311914bf5c2 (diff) | |
| download | bcm5719-llvm-88de8b2a2bb09e4e3ebb829949e0256c3e86c609.tar.gz bcm5719-llvm-88de8b2a2bb09e4e3ebb829949e0256c3e86c609.zip | |
GenerateCubinAccessors: use LLVM dialect constants
The GenerateCubinAccessors was generating functions that fill
dynamically-allocated memory with the binary constant of a CUBIN attached as a
stirng attribute to the GPU kernel. This approach was taken to circumvent the
missing support for global constants in the LLVM dialect (and MLIR in general).
Global constants were recently added to the LLVM dialect. Change the
GenerateCubinAccessors pass to emit a global constant array of characters and a
function that returns a pointer to the first character in the array.
PiperOrigin-RevId: 263092052
Diffstat (limited to 'mlir/lib/Conversion/GPUToCUDA')
| -rw-r--r-- | mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp | 118 |
1 files changed, 48 insertions, 70 deletions
diff --git a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp index fa481632e29..332a1324865 100644 --- a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp +++ b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp @@ -40,16 +40,10 @@ namespace { constexpr const char *kCubinAnnotation = "nvvm.cubin"; constexpr const char *kCubinGetterAnnotation = "nvvm.cubingetter"; constexpr const char *kCubinGetterSuffix = "_cubin"; -constexpr const char *kMallocHelperName = "malloc"; - -/// A pass generating getter functions for all cubin blobs annotated on -/// functions via the nvvm.cubin attribute. -/// -/// The functions allocate memory using the system malloc call with signature -/// void *malloc(size_t size). This function has to be provided by the actual -/// runner that executes the generated code. -/// -/// This is a stop-gap measure until MLIR supports global constants. +constexpr const char *kCubinStorageSuffix = "_cubin_cst"; + +/// A pass generating global strings and getter functions for all cubin blobs +/// annotated on functions via the nvvm.cubin attribute. class GpuGenerateCubinAccessorsPass : public ModulePass<GpuGenerateCubinAccessorsPass> { private: @@ -59,79 +53,63 @@ private: return LLVM::LLVMType::getIntNTy(llvmDialect, bits); } - FuncOp getMallocHelper(Location loc, Builder &builder) { - FuncOp result = getModule().lookupSymbol<FuncOp>(kMallocHelperName); - if (!result) { - result = FuncOp::create( - loc, kMallocHelperName, - builder.getFunctionType(ArrayRef<Type>{getIndexType()}, - LLVM::LLVMType::getInt8PtrTy(llvmDialect))); - getModule().push_back(result); - } - return result; - } - - // Generates a function that returns a char array at runtime that contains the - // data from blob. As there are currently no global constants, this uses a - // sequence of store operations. - // TODO(herhut): Use global constants instead. - FuncOp generateCubinAccessor(Builder &builder, FuncOp &orig, - StringAttr blob) { + // Inserts a global constant string containing `blob` into the parent module + // of `orig` and generates the function that returns the address of the first + // character of this string. + // TODO(herhut): consider fusing this pass with launch-func-to-cuda. + void generate(FuncOp orig, StringAttr blob) { Location loc = orig.getLoc(); SmallString<128> nameBuffer(orig.getName()); + auto module = orig.getParentOfType<ModuleOp>(); + assert(module && "function must belong to a module"); + + // Create a global at the top of the module. + OpBuilder moduleBuilder(module.getBody(), module.getBody()->begin()); + auto type = LLVM::LLVMType::getArrayTy( + LLVM::LLVMType::getInt8Ty(llvmDialect), blob.getValue().size()); + nameBuffer.append(kCubinStorageSuffix); + auto cubinGlobalString = moduleBuilder.create<LLVM::GlobalOp>( + loc, type, /*isConstant=*/true, StringRef(nameBuffer), blob); + + // Insert the getter function just after the original function. + moduleBuilder.setInsertionPoint(orig.getOperation()->getNextNode()); + auto getterType = moduleBuilder.getFunctionType( + llvm::None, LLVM::LLVMType::getInt8PtrTy(llvmDialect)); + // Drop the storage suffix before appending the getter suffix. + nameBuffer.resize(orig.getName().size()); nameBuffer.append(kCubinGetterSuffix); - // Generate a function that returns void*. - FuncOp result = FuncOp::create( - loc, mlir::Identifier::get(nameBuffer, &getContext()), - builder.getFunctionType(ArrayRef<Type>{}, - LLVM::LLVMType::getInt8PtrTy(llvmDialect))); - // Insert a body block that just returns the constant. - OpBuilder ob(result.getBody()); - ob.createBlock(&result.getBody()); - auto sizeConstant = ob.create<LLVM::ConstantOp>( - loc, getIndexType(), - builder.getIntegerAttr(builder.getIndexType(), blob.getValue().size())); - auto memory = - ob.create<LLVM::CallOp>( - loc, ArrayRef<Type>{LLVM::LLVMType::getInt8PtrTy(llvmDialect)}, - builder.getSymbolRefAttr(getMallocHelper(loc, builder)), - ArrayRef<Value *>{sizeConstant}) - .getResult(0); - for (auto byte : llvm::enumerate(blob.getValue().bytes())) { - auto index = ob.create<LLVM::ConstantOp>( - loc, LLVM::LLVMType::getInt32Ty(llvmDialect), - builder.getI32IntegerAttr(byte.index())); - auto gep = - ob.create<LLVM::GEPOp>(loc, LLVM::LLVMType::getInt8PtrTy(llvmDialect), - memory, ArrayRef<Value *>{index}); - auto value = ob.create<LLVM::ConstantOp>( - loc, LLVM::LLVMType::getInt8Ty(llvmDialect), - builder.getIntegerAttr(builder.getIntegerType(8), byte.value())); - ob.create<LLVM::StoreOp>(loc, value, gep); - } - ob.create<LLVM::ReturnOp>(loc, ArrayRef<Value *>{memory}); + auto result = moduleBuilder.create<FuncOp>( + loc, StringRef(nameBuffer), getterType, ArrayRef<NamedAttribute>()); + Block *entryBlock = result.addEntryBlock(); + + // Obtain the address of the first character of the global string containing + // the cubin and return from the getter (addressof will return [? x i8]*). + OpBuilder builder(entryBlock); + Value *cubinGlobalStringPtr = + builder.create<LLVM::AddressOfOp>(loc, cubinGlobalString); + Value *cst0 = builder.create<LLVM::ConstantOp>( + loc, getIndexType(), builder.getIntegerAttr(builder.getIndexType(), 0)); + Value *startPtr = builder.create<LLVM::GEPOp>( + loc, LLVM::LLVMType::getInt8PtrTy(llvmDialect), cubinGlobalStringPtr, + ArrayRef<Value *>({cst0, cst0})); + builder.create<LLVM::ReturnOp>(loc, startPtr); + // Store the name of the getter on the function for easier lookup. orig.setAttr(kCubinGetterAnnotation, builder.getSymbolRefAttr(result)); - return result; } public: - // Run the dialect converter on the module. + // Perform the conversion on the module. This may insert globals, so it + // cannot be done on multiple functions in parallel. void runOnModule() override { llvmDialect = getModule().getContext()->getRegisteredDialect<LLVM::LLVMDialect>(); - auto module = getModule(); - Builder builder(&getContext()); - - auto functions = module.getOps<FuncOp>(); - for (auto it = functions.begin(); it != functions.end();) { - // Move iterator to after the current function so that potential insertion - // of the accessor is after the kernel with cubin iself. - FuncOp orig = *it++; - StringAttr cubinBlob = orig.getAttrOfType<StringAttr>(kCubinAnnotation); + + for (auto func : getModule().getOps<FuncOp>()) { + StringAttr cubinBlob = func.getAttrOfType<StringAttr>(kCubinAnnotation); if (!cubinBlob) continue; - module.insert(it, generateCubinAccessor(builder, orig, cubinBlob)); + generate(func, cubinBlob); } } |

