summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Conversion/GPUToCUDA
diff options
context:
space:
mode:
authorAlex Zinenko <zinenko@google.com>2019-08-13 01:38:54 -0700
committerA. Unique TensorFlower <gardener@tensorflow.org>2019-08-13 01:39:21 -0700
commit88de8b2a2bb09e4e3ebb829949e0256c3e86c609 (patch)
treea2d2169e44ed00aeafdb407a6ac7ac4dc917ebcc /mlir/lib/Conversion/GPUToCUDA
parent926fb685deadfed2042163145ac52311914bf5c2 (diff)
downloadbcm5719-llvm-88de8b2a2bb09e4e3ebb829949e0256c3e86c609.tar.gz
bcm5719-llvm-88de8b2a2bb09e4e3ebb829949e0256c3e86c609.zip
GenerateCubinAccessors: use LLVM dialect constants
The GenerateCubinAccessors was generating functions that fill dynamically-allocated memory with the binary constant of a CUBIN attached as a stirng attribute to the GPU kernel. This approach was taken to circumvent the missing support for global constants in the LLVM dialect (and MLIR in general). Global constants were recently added to the LLVM dialect. Change the GenerateCubinAccessors pass to emit a global constant array of characters and a function that returns a pointer to the first character in the array. PiperOrigin-RevId: 263092052
Diffstat (limited to 'mlir/lib/Conversion/GPUToCUDA')
-rw-r--r--mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp118
1 files changed, 48 insertions, 70 deletions
diff --git a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp
index fa481632e29..332a1324865 100644
--- a/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/GenerateCubinAccessors.cpp
@@ -40,16 +40,10 @@ namespace {
constexpr const char *kCubinAnnotation = "nvvm.cubin";
constexpr const char *kCubinGetterAnnotation = "nvvm.cubingetter";
constexpr const char *kCubinGetterSuffix = "_cubin";
-constexpr const char *kMallocHelperName = "malloc";
-
-/// A pass generating getter functions for all cubin blobs annotated on
-/// functions via the nvvm.cubin attribute.
-///
-/// The functions allocate memory using the system malloc call with signature
-/// void *malloc(size_t size). This function has to be provided by the actual
-/// runner that executes the generated code.
-///
-/// This is a stop-gap measure until MLIR supports global constants.
+constexpr const char *kCubinStorageSuffix = "_cubin_cst";
+
+/// A pass generating global strings and getter functions for all cubin blobs
+/// annotated on functions via the nvvm.cubin attribute.
class GpuGenerateCubinAccessorsPass
: public ModulePass<GpuGenerateCubinAccessorsPass> {
private:
@@ -59,79 +53,63 @@ private:
return LLVM::LLVMType::getIntNTy(llvmDialect, bits);
}
- FuncOp getMallocHelper(Location loc, Builder &builder) {
- FuncOp result = getModule().lookupSymbol<FuncOp>(kMallocHelperName);
- if (!result) {
- result = FuncOp::create(
- loc, kMallocHelperName,
- builder.getFunctionType(ArrayRef<Type>{getIndexType()},
- LLVM::LLVMType::getInt8PtrTy(llvmDialect)));
- getModule().push_back(result);
- }
- return result;
- }
-
- // Generates a function that returns a char array at runtime that contains the
- // data from blob. As there are currently no global constants, this uses a
- // sequence of store operations.
- // TODO(herhut): Use global constants instead.
- FuncOp generateCubinAccessor(Builder &builder, FuncOp &orig,
- StringAttr blob) {
+ // Inserts a global constant string containing `blob` into the parent module
+ // of `orig` and generates the function that returns the address of the first
+ // character of this string.
+ // TODO(herhut): consider fusing this pass with launch-func-to-cuda.
+ void generate(FuncOp orig, StringAttr blob) {
Location loc = orig.getLoc();
SmallString<128> nameBuffer(orig.getName());
+ auto module = orig.getParentOfType<ModuleOp>();
+ assert(module && "function must belong to a module");
+
+ // Create a global at the top of the module.
+ OpBuilder moduleBuilder(module.getBody(), module.getBody()->begin());
+ auto type = LLVM::LLVMType::getArrayTy(
+ LLVM::LLVMType::getInt8Ty(llvmDialect), blob.getValue().size());
+ nameBuffer.append(kCubinStorageSuffix);
+ auto cubinGlobalString = moduleBuilder.create<LLVM::GlobalOp>(
+ loc, type, /*isConstant=*/true, StringRef(nameBuffer), blob);
+
+ // Insert the getter function just after the original function.
+ moduleBuilder.setInsertionPoint(orig.getOperation()->getNextNode());
+ auto getterType = moduleBuilder.getFunctionType(
+ llvm::None, LLVM::LLVMType::getInt8PtrTy(llvmDialect));
+ // Drop the storage suffix before appending the getter suffix.
+ nameBuffer.resize(orig.getName().size());
nameBuffer.append(kCubinGetterSuffix);
- // Generate a function that returns void*.
- FuncOp result = FuncOp::create(
- loc, mlir::Identifier::get(nameBuffer, &getContext()),
- builder.getFunctionType(ArrayRef<Type>{},
- LLVM::LLVMType::getInt8PtrTy(llvmDialect)));
- // Insert a body block that just returns the constant.
- OpBuilder ob(result.getBody());
- ob.createBlock(&result.getBody());
- auto sizeConstant = ob.create<LLVM::ConstantOp>(
- loc, getIndexType(),
- builder.getIntegerAttr(builder.getIndexType(), blob.getValue().size()));
- auto memory =
- ob.create<LLVM::CallOp>(
- loc, ArrayRef<Type>{LLVM::LLVMType::getInt8PtrTy(llvmDialect)},
- builder.getSymbolRefAttr(getMallocHelper(loc, builder)),
- ArrayRef<Value *>{sizeConstant})
- .getResult(0);
- for (auto byte : llvm::enumerate(blob.getValue().bytes())) {
- auto index = ob.create<LLVM::ConstantOp>(
- loc, LLVM::LLVMType::getInt32Ty(llvmDialect),
- builder.getI32IntegerAttr(byte.index()));
- auto gep =
- ob.create<LLVM::GEPOp>(loc, LLVM::LLVMType::getInt8PtrTy(llvmDialect),
- memory, ArrayRef<Value *>{index});
- auto value = ob.create<LLVM::ConstantOp>(
- loc, LLVM::LLVMType::getInt8Ty(llvmDialect),
- builder.getIntegerAttr(builder.getIntegerType(8), byte.value()));
- ob.create<LLVM::StoreOp>(loc, value, gep);
- }
- ob.create<LLVM::ReturnOp>(loc, ArrayRef<Value *>{memory});
+ auto result = moduleBuilder.create<FuncOp>(
+ loc, StringRef(nameBuffer), getterType, ArrayRef<NamedAttribute>());
+ Block *entryBlock = result.addEntryBlock();
+
+ // Obtain the address of the first character of the global string containing
+ // the cubin and return from the getter (addressof will return [? x i8]*).
+ OpBuilder builder(entryBlock);
+ Value *cubinGlobalStringPtr =
+ builder.create<LLVM::AddressOfOp>(loc, cubinGlobalString);
+ Value *cst0 = builder.create<LLVM::ConstantOp>(
+ loc, getIndexType(), builder.getIntegerAttr(builder.getIndexType(), 0));
+ Value *startPtr = builder.create<LLVM::GEPOp>(
+ loc, LLVM::LLVMType::getInt8PtrTy(llvmDialect), cubinGlobalStringPtr,
+ ArrayRef<Value *>({cst0, cst0}));
+ builder.create<LLVM::ReturnOp>(loc, startPtr);
+
// Store the name of the getter on the function for easier lookup.
orig.setAttr(kCubinGetterAnnotation, builder.getSymbolRefAttr(result));
- return result;
}
public:
- // Run the dialect converter on the module.
+ // Perform the conversion on the module. This may insert globals, so it
+ // cannot be done on multiple functions in parallel.
void runOnModule() override {
llvmDialect =
getModule().getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
- auto module = getModule();
- Builder builder(&getContext());
-
- auto functions = module.getOps<FuncOp>();
- for (auto it = functions.begin(); it != functions.end();) {
- // Move iterator to after the current function so that potential insertion
- // of the accessor is after the kernel with cubin iself.
- FuncOp orig = *it++;
- StringAttr cubinBlob = orig.getAttrOfType<StringAttr>(kCubinAnnotation);
+
+ for (auto func : getModule().getOps<FuncOp>()) {
+ StringAttr cubinBlob = func.getAttrOfType<StringAttr>(kCubinAnnotation);
if (!cubinBlob)
continue;
- module.insert(it, generateCubinAccessor(builder, orig, cubinBlob));
+ generate(func, cubinBlob);
}
}
OpenPOWER on IntegriCloud