summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTres Popp <tpopp@google.com>2020-01-14 11:09:59 +0100
committerStephan Herhut <herhut@google.com>2020-01-14 12:05:47 +0100
commit4624a1e8ac8a3f69cc887403b976f538f587744a (patch)
tree13cb3b1371abedefbdbd7e09933633acc4aca44c
parent9492e9d8cfd356109276da5aa926b297db0e16db (diff)
downloadbcm5719-llvm-4624a1e8ac8a3f69cc887403b976f538f587744a.tar.gz
bcm5719-llvm-4624a1e8ac8a3f69cc887403b976f538f587744a.zip
[mlir] Create a gpu.module operation for the GPU Dialect.
Summary: This is based on the use of code constantly checking for an attribute on a model and instead represents the distinct operaion with a different op. Instead, this op can be used to provide better filtering. Reviewers: herhut, mravishankar, antiagainst, rriddle Reviewed By: herhut, antiagainst, rriddle Subscribers: liufengdb, aartbik, jholewinski, mgorny, mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, csigg, arpith-jacob, mgester, lucyrfox, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D72336
-rw-r--r--mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h11
-rw-r--r--mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h10
-rw-r--r--mlir/include/mlir/Dialect/GPU/GPUOps.td52
-rw-r--r--mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp21
-rw-r--r--mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp13
-rw-r--r--mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp21
-rw-r--r--mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt7
-rw-r--r--mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp55
-rw-r--r--mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp15
-rw-r--r--mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.td22
-rw-r--r--mlir/lib/Dialect/GPU/IR/GPUDialect.cpp55
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp22
-rw-r--r--mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir2
-rw-r--r--mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir6
-rw-r--r--mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir20
-rw-r--r--mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir8
-rw-r--r--mlir/test/Conversion/GPUToSPIRV/builtins.mlir12
-rw-r--r--mlir/test/Conversion/GPUToSPIRV/load-store.mlir2
-rw-r--r--mlir/test/Conversion/GPUToSPIRV/loop.mlir2
-rw-r--r--mlir/test/Conversion/GPUToSPIRV/simple.mlir2
-rw-r--r--mlir/test/Dialect/GPU/invalid.mlir11
-rw-r--r--mlir/test/Dialect/GPU/ops.mlir2
-rw-r--r--mlir/test/Dialect/GPU/outlining.mlir2
-rw-r--r--mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp2
24 files changed, 235 insertions, 140 deletions
diff --git a/mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h b/mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h
index 4eb6379adf6..f61e40ef5f9 100644
--- a/mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h
+++ b/mlir/include/mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h
@@ -19,12 +19,17 @@ namespace mlir {
class Location;
class ModuleOp;
+template <typename T>
+class OpPassBase;
+
+namespace gpu {
+class GPUModuleOp;
+} // namespace gpu
+
namespace LLVM {
class LLVMDialect;
} // namespace LLVM
-template <typename T> class OpPassBase;
-
using OwnedCubin = std::unique_ptr<std::vector<char>>;
using CubinGenerator =
std::function<OwnedCubin(const std::string &, Location, StringRef)>;
@@ -38,7 +43,7 @@ using CubinGenerator =
/// attached as a string attribute named 'nvvm.cubin' to the kernel function.
/// After the transformation, the body of the kernel function is removed (i.e.,
/// it is turned into a declaration).
-std::unique_ptr<OpPassBase<ModuleOp>>
+std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator);
/// Creates a pass to convert a gpu.launch_func operation into a sequence of
diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
index 75e4f7e374c..b3212279fab 100644
--- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
+++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
@@ -14,15 +14,19 @@ namespace mlir {
class LLVMTypeConverter;
class OwningRewritePatternList;
-class ModuleOp;
-template <typename OpT> class OpPassBase;
+template <typename OpT>
+class OpPassBase;
+
+namespace gpu {
+class GPUModuleOp;
+}
/// Collect a set of patterns to convert from the GPU dialect to NVVM.
void populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
OwningRewritePatternList &patterns);
/// Creates a pass that lowers GPU dialect operations to NVVM counterparts.
-std::unique_ptr<OpPassBase<ModuleOp>> createLowerGpuOpsToNVVMOpsPass();
+std::unique_ptr<OpPassBase<gpu::GPUModuleOp>> createLowerGpuOpsToNVVMOpsPass();
} // namespace mlir
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 766ddbf202c..3df6ff4be0c 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -588,4 +588,56 @@ def GPU_BarrierOp : GPU_Op<"barrier"> {
let printer = [{ p << getOperationName(); }];
}
+def GPU_GPUModuleOp : GPU_Op<"module", [
+ IsolatedFromAbove, SymbolTable, Symbol,
+ SingleBlockImplicitTerminator<"ModuleEndOp">
+]> {
+ let summary = "A top level compilation unit containing code to be run on a GPU.";
+ let description = [{
+ GPU module contains code that is intended to be run on a GPU. A host device
+ can launch this code through a gpu.launc_func that creates a fully
+ qualified symbol through the gpu.module's symbol and a gpu.func symbol
+ contained in the gpu.module.
+
+ The module's top-level scope is modeled by a single region with a single
+ block. GPU modules are required to have a name that is used for symbol
+ resolution by the gpu.launch_func operation.
+
+ Using an op with a region to define a GPU module enables "embedding" GPU
+ modules with SIMT execution models in other dialects in a clean manner and
+ allows filtering of code regions to execute passes on only code intended to
+ or not intended to be run on the separate device.
+
+ ```
+ gpu.module @symbol_name {
+ gpu.func {}
+ ...
+ gpu.module_end
+ }
+
+ ```
+ }];
+ let builders = [OpBuilder<"Builder *builder, OperationState &result, "
+ "StringRef name">];
+ let parser = [{ return ::parseGPUModuleOp(parser, result); }];
+ let printer = [{ return ::print(p, *this); }];
+ let regions = (region SizedRegion<1>:$body);
+
+ // We need to ensure the block inside the region is properly terminated;
+ // the auto-generated builders do not guarantee that.
+ let skipDefaultBuilders = 1;
+}
+
+def GPU_ModuleEndOp : GPU_Op<"module_end", [
+ Terminator, HasParent<"GPUModuleOp">
+]> {
+ let summary = "A pseudo op that marks the end of a gpu.module.";
+ let description = [{
+ This op terminates the only block inside the only region of a `gpu.module`.
+ }];
+
+ let parser = [{ return success(); }];
+ let printer = [{ p << getOperationName(); }];
+}
+
#endif // GPU_OPS
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
index 66a2e66f99a..b111c96313c 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
@@ -46,18 +46,15 @@ static constexpr const char *kCubinAnnotation = "nvvm.cubin";
/// IR and further to PTX. A user provided CubinGenerator compiles the PTX to
/// GPU binary code, which is then attached as an attribute to the function. The
/// function body is erased.
-class GpuKernelToCubinPass : public ModulePass<GpuKernelToCubinPass> {
+class GpuKernelToCubinPass
+ : public OperationPass<GpuKernelToCubinPass, gpu::GPUModuleOp> {
public:
GpuKernelToCubinPass(
CubinGenerator cubinGenerator = compilePtxToCubinForTesting)
: cubinGenerator(cubinGenerator) {}
- void runOnModule() override {
- ModuleOp module = getModule();
- if (!module.getAttrOfType<UnitAttr>(
- gpu::GPUDialect::getKernelModuleAttrName()) ||
- !module.getName())
- return;
+ void runOnOperation() override {
+ gpu::GPUModuleOp module = getOperation();
// Make sure the NVPTX target is initialized.
LLVMInitializeNVPTXTarget();
@@ -71,8 +68,8 @@ public:
// Translate the module to CUBIN and attach the result as attribute to the
// module.
- if (auto cubinAttr = translateGpuModuleToCubinAnnotation(
- *llvmModule, module.getLoc(), *module.getName()))
+ if (auto cubinAttr = translateGPUModuleToCubinAnnotation(
+ *llvmModule, module.getLoc(), module.getName()))
module.setAttr(kCubinAnnotation, cubinAttr);
else
signalPassFailure();
@@ -92,7 +89,7 @@ private:
StringRef name);
/// Translates llvmModule to cubin and returns the result as attribute.
- StringAttr translateGpuModuleToCubinAnnotation(llvm::Module &llvmModule,
+ StringAttr translateGPUModuleToCubinAnnotation(llvm::Module &llvmModule,
Location loc, StringRef name);
CubinGenerator cubinGenerator;
@@ -149,7 +146,7 @@ OwnedCubin GpuKernelToCubinPass::convertModuleToCubin(llvm::Module &llvmModule,
return cubinGenerator(ptx, loc, name);
}
-StringAttr GpuKernelToCubinPass::translateGpuModuleToCubinAnnotation(
+StringAttr GpuKernelToCubinPass::translateGPUModuleToCubinAnnotation(
llvm::Module &llvmModule, Location loc, StringRef name) {
auto cubin = convertModuleToCubin(llvmModule, loc, name);
if (!cubin)
@@ -157,7 +154,7 @@ StringAttr GpuKernelToCubinPass::translateGpuModuleToCubinAnnotation(
return StringAttr::get({cubin->data(), cubin->size()}, loc->getContext());
}
-std::unique_ptr<OpPassBase<ModuleOp>>
+std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
mlir::createConvertGPUKernelToCubinPass(CubinGenerator cubinGenerator) {
return std::make_unique<GpuKernelToCubinPass>(cubinGenerator);
}
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
index 41f69d6e21d..31024d2881b 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
@@ -132,9 +132,9 @@ public:
// GPU kernel modules are no longer necessary since we have a global
// constant with the CUBIN data.
- for (auto m : llvm::make_early_inc_range(getModule().getOps<ModuleOp>()))
- if (m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
- m.erase();
+ for (auto m :
+ llvm::make_early_inc_range(getModule().getOps<gpu::GPUModuleOp>()))
+ m.erase();
}
private:
@@ -343,8 +343,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
builder.getI32IntegerAttr(0));
// Create an LLVM global with CUBIN extracted from the kernel annotation and
// obtain a pointer to the first byte in it.
- auto kernelModule =
- getModule().lookupSymbol<ModuleOp>(launchOp.getKernelModuleName());
+ auto kernelModule = getModule().lookupSymbol<gpu::GPUModuleOp>(
+ launchOp.getKernelModuleName());
assert(kernelModule && "expected a kernel module");
auto cubinAttr = kernelModule.getAttrOfType<StringAttr>(kCubinAnnotation);
@@ -354,8 +354,7 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
return signalPassFailure();
}
- assert(kernelModule.getName() && "expected a named module");
- SmallString<128> nameBuffer(*kernelModule.getName());
+ SmallString<128> nameBuffer(kernelModule.getName());
nameBuffer.append(kCubinStorageSuffix);
Value data = LLVM::createGlobalString(
loc, builder, nameBuffer.str(), cubinAttr.getValue(),
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index e2b1e0e533c..84bc7ff1d5f 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -200,7 +200,7 @@ private:
auto type = operand.getType().cast<LLVM::LLVMType>();
// Create shared memory array to store the warp reduction.
- auto module = operand.getDefiningOp()->getParentOfType<ModuleOp>();
+ auto module = operand.getDefiningOp()->getParentOfType<gpu::GPUModuleOp>();
assert(module && "op must belong to a module");
Value sharedMemPtr =
createSharedMemoryArray(loc, module, type, kWarpSize, rewriter);
@@ -391,10 +391,10 @@ private:
}
/// Creates a global array stored in shared memory.
- Value createSharedMemoryArray(Location loc, ModuleOp module,
+ Value createSharedMemoryArray(Location loc, gpu::GPUModuleOp module,
LLVM::LLVMType elementType, int numElements,
ConversionPatternRewriter &rewriter) const {
- OpBuilder builder(module.getBodyRegion());
+ OpBuilder builder(module.body());
auto arrayType = LLVM::LLVMType::getArrayTy(elementType, numElements);
StringRef name = "reduce_buffer";
@@ -699,13 +699,11 @@ struct GPUReturnOpLowering : public LLVMOpLowering {
///
/// This pass only handles device code and is not meant to be run on GPU host
/// code.
-class LowerGpuOpsToNVVMOpsPass : public ModulePass<LowerGpuOpsToNVVMOpsPass> {
+class LowerGpuOpsToNVVMOpsPass
+ : public OperationPass<LowerGpuOpsToNVVMOpsPass, gpu::GPUModuleOp> {
public:
- void runOnModule() override {
- ModuleOp m = getModule();
- if (!m.getAttrOfType<UnitAttr>(gpu::GPUDialect::getKernelModuleAttrName()))
- return;
-
+ void runOnOperation() override {
+ gpu::GPUModuleOp m = getOperation();
OwningRewritePatternList patterns;
NVVMTypeConverter converter(m.getContext());
populateStdToLLVMConversionPatterns(converter, patterns);
@@ -718,7 +716,7 @@ public:
target.addLegalDialect<LLVM::LLVMDialect>();
target.addLegalDialect<NVVM::NVVMDialect>();
// TODO(csigg): Remove once we support replacing non-root ops.
- target.addLegalOp<gpu::YieldOp>();
+ target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp, gpu::ModuleEndOp>();
if (failed(applyPartialConversion(m, target, patterns, &converter)))
signalPassFailure();
}
@@ -750,7 +748,8 @@ void mlir::populateGpuToNVVMConversionPatterns(
"__nv_exp");
}
-std::unique_ptr<OpPassBase<ModuleOp>> mlir::createLowerGpuOpsToNVVMOpsPass() {
+std::unique_ptr<OpPassBase<gpu::GPUModuleOp>>
+mlir::createLowerGpuOpsToNVVMOpsPass() {
return std::make_unique<LowerGpuOpsToNVVMOpsPass>();
}
diff --git a/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
index be82894461d..adeb4e099ab 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToSPIRV/CMakeLists.txt
@@ -1,8 +1,15 @@
+set(LLVM_TARGET_DEFINITIONS GPUToSPIRV.td)
+mlir_tablegen(GPUToSPIRV.cpp.inc -gen-rewriters)
+add_public_tablegen_target(MLIRGPUToSPIRVIncGen)
+
add_llvm_library(MLIRGPUtoSPIRVTransforms
ConvertGPUToSPIRV.cpp
ConvertGPUToSPIRVPass.cpp
)
+add_dependencies(MLIRGPUtoSPIRVTransforms
+ MLIRGPUToSPIRVIncGen)
+
target_link_libraries(MLIRGPUtoSPIRVTransforms
MLIRGPU
MLIRIR
diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
index 2fd8cedfd63..a90cea99be4 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRV.cpp
@@ -63,27 +63,13 @@ private:
SmallVector<int32_t, 3> workGroupSizeAsInt32;
};
-/// Pattern to convert a module with gpu.kernel_module attribute to a
-/// spv.module.
-class KernelModuleConversion final : public SPIRVOpLowering<ModuleOp> {
+/// Pattern to convert a gpu.module to a spv.module.
+class GPUModuleConversion final : public SPIRVOpLowering<gpu::GPUModuleOp> {
public:
- using SPIRVOpLowering<ModuleOp>::SPIRVOpLowering;
+ using SPIRVOpLowering<gpu::GPUModuleOp>::SPIRVOpLowering;
PatternMatchResult
- matchAndRewrite(ModuleOp moduleOp, ArrayRef<Value> operands,
- ConversionPatternRewriter &rewriter) const override;
-};
-
-/// Pattern to convert a module terminator op to a terminator of spv.module op.
-// TODO: Move this into DRR, but that requires ModuleTerminatorOp to be defined
-// in ODS.
-class KernelModuleTerminatorConversion final
- : public SPIRVOpLowering<ModuleTerminatorOp> {
-public:
- using SPIRVOpLowering<ModuleTerminatorOp>::SPIRVOpLowering;
-
- PatternMatchResult
- matchAndRewrite(ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
+ matchAndRewrite(gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override;
};
@@ -284,16 +270,12 @@ KernelFnConversion::matchAndRewrite(gpu::GPUFuncOp funcOp,
}
//===----------------------------------------------------------------------===//
-// ModuleOp with gpu.kernel_module.
+// ModuleOp with gpu.module.
//===----------------------------------------------------------------------===//
-PatternMatchResult KernelModuleConversion::matchAndRewrite(
- ModuleOp moduleOp, ArrayRef<Value> operands,
+PatternMatchResult GPUModuleConversion::matchAndRewrite(
+ gpu::GPUModuleOp moduleOp, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const {
- if (!moduleOp.getAttrOfType<UnitAttr>(
- gpu::GPUDialect::getKernelModuleAttrName())) {
- return matchFailure();
- }
// TODO : Generalize this to account for different extensions,
// capabilities, extended_instruction_sets, other addressing models
// and memory models.
@@ -302,8 +284,8 @@ PatternMatchResult KernelModuleConversion::matchAndRewrite(
spirv::MemoryModel::GLSL450, spirv::Capability::Shader,
spirv::Extension::SPV_KHR_storage_buffer_storage_class);
// Move the region from the module op into the SPIR-V module.
- Region &spvModuleRegion = spvModule.getOperation()->getRegion(0);
- rewriter.inlineRegionBefore(moduleOp.getBodyRegion(), spvModuleRegion,
+ Region &spvModuleRegion = spvModule.body();
+ rewriter.inlineRegionBefore(moduleOp.body(), spvModuleRegion,
spvModuleRegion.begin());
// The spv.module build method adds a block with a terminator. Remove that
// block. The terminator of the module op in the remaining block will be
@@ -314,17 +296,6 @@ PatternMatchResult KernelModuleConversion::matchAndRewrite(
}
//===----------------------------------------------------------------------===//
-// ModuleTerminatorOp for gpu.kernel_module.
-//===----------------------------------------------------------------------===//
-
-PatternMatchResult KernelModuleTerminatorConversion::matchAndRewrite(
- ModuleTerminatorOp terminatorOp, ArrayRef<Value> operands,
- ConversionPatternRewriter &rewriter) const {
- rewriter.replaceOpWithNewOp<spirv::ModuleEndOp>(terminatorOp);
- return matchSuccess();
-}
-
-//===----------------------------------------------------------------------===//
// GPU return inside kernel functions to SPIR-V return.
//===----------------------------------------------------------------------===//
@@ -342,14 +313,18 @@ PatternMatchResult GPUReturnOpConversion::matchAndRewrite(
// GPU To SPIRV Patterns.
//===----------------------------------------------------------------------===//
+namespace {
+#include "GPUToSPIRV.cpp.inc"
+}
+
void mlir::populateGPUToSPIRVPatterns(MLIRContext *context,
SPIRVTypeConverter &typeConverter,
OwningRewritePatternList &patterns,
ArrayRef<int64_t> workGroupSize) {
+ populateWithGenerated(context, &patterns);
patterns.insert<KernelFnConversion>(context, typeConverter, workGroupSize);
patterns.insert<
- GPUReturnOpConversion, ForOpConversion, KernelModuleConversion,
- KernelModuleTerminatorConversion,
+ GPUReturnOpConversion, ForOpConversion, GPUModuleConversion,
LaunchConfigConversion<gpu::BlockDimOp, spirv::BuiltIn::WorkgroupSize>,
LaunchConfigConversion<gpu::BlockIdOp, spirv::BuiltIn::WorkgroupId>,
LaunchConfigConversion<gpu::GridDimOp, spirv::BuiltIn::NumWorkgroups>,
diff --git a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
index 68392c36765..bc8273ec2a9 100644
--- a/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
+++ b/mlir/lib/Conversion/GPUToSPIRV/ConvertGPUToSPIRVPass.cpp
@@ -60,15 +60,12 @@ void GPUToSPIRVPass::runOnModule() {
SmallVector<Operation *, 1> kernelModules;
OpBuilder builder(context);
- module.walk([&builder, &kernelModules](ModuleOp moduleOp) {
- if (moduleOp.getAttrOfType<UnitAttr>(
- gpu::GPUDialect::getKernelModuleAttrName())) {
- // For each kernel module (should be only 1 for now, but that is not a
- // requirement here), clone the module for conversion because the
- // gpu.launch function still needs the kernel module.
- builder.setInsertionPoint(moduleOp.getOperation());
- kernelModules.push_back(builder.clone(*moduleOp.getOperation()));
- }
+ module.walk([&builder, &kernelModules](gpu::GPUModuleOp moduleOp) {
+ // For each kernel module (should be only 1 for now, but that is not a
+ // requirement here), clone the module for conversion because the
+ // gpu.launch function still needs the kernel module.
+ builder.setInsertionPoint(moduleOp.getOperation());
+ kernelModules.push_back(builder.clone(*moduleOp.getOperation()));
});
SPIRVTypeConverter typeConverter;
diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.td b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.td
new file mode 100644
index 00000000000..cfe9d26273c
--- /dev/null
+++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.td
@@ -0,0 +1,22 @@
+//===-- GPUToSPIRV.td - GPU to SPIR-V Dialect Lowerings ----*- tablegen -*-===//
+//
+// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains patterns to lower GPU dialect ops to to SPIR-V ops.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef CONVERT_GPU_TO_SPIRV
+#define CONVERT_GPU_TO_SPIRV
+
+include "mlir/Dialect/GPU/GPUOps.td"
+include "mlir/Dialect/SPIRV/SPIRVStructureOps.td"
+
+def : Pat<(GPU_ModuleEndOp), (SPV_ModuleEndOp)>;
+
+#endif // CONVERT_GPU_TO_SPIRV
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index e750d0fefff..dbca1fb003a 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -72,15 +72,10 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
// Check that `launch_func` refers to a well-formed GPU kernel module.
StringRef kernelModuleName = launchOp.getKernelModuleName();
- auto kernelModule = module.lookupSymbol<ModuleOp>(kernelModuleName);
+ auto kernelModule = module.lookupSymbol<GPUModuleOp>(kernelModuleName);
if (!kernelModule)
return launchOp.emitOpError()
<< "kernel module '" << kernelModuleName << "' is undefined";
- if (!kernelModule.getAttrOfType<UnitAttr>(
- GPUDialect::getKernelModuleAttrName()))
- return launchOp.emitOpError("module '")
- << kernelModuleName << "' is missing the '"
- << GPUDialect::getKernelModuleAttrName() << "' attribute";
// Check that `launch_func` refers to a well-formed kernel function.
StringRef kernelName = launchOp.kernel();
@@ -517,10 +512,9 @@ void LaunchFuncOp::build(Builder *builder, OperationState &result,
result.addOperands(kernelOperands);
result.addAttribute(getKernelAttrName(),
builder->getStringAttr(kernelFunc.getName()));
- auto kernelModule = kernelFunc.getParentOfType<ModuleOp>();
- if (Optional<StringRef> kernelModuleName = kernelModule.getName())
- result.addAttribute(getKernelModuleAttrName(),
- builder->getSymbolRefAttr(*kernelModuleName));
+ auto kernelModule = kernelFunc.getParentOfType<GPUModuleOp>();
+ result.addAttribute(getKernelModuleAttrName(),
+ builder->getSymbolRefAttr(kernelModule.getName()));
}
void LaunchFuncOp::build(Builder *builder, OperationState &result,
@@ -820,6 +814,47 @@ LogicalResult GPUFuncOp::verifyBody() {
return success();
}
+//===----------------------------------------------------------------------===//
+// GPUModuleOp
+//===----------------------------------------------------------------------===//
+
+void GPUModuleOp::build(Builder *builder, OperationState &result,
+ StringRef name) {
+ ensureTerminator(*result.addRegion(), *builder, result.location);
+ result.attributes.push_back(builder->getNamedAttr(
+ ::mlir::SymbolTable::getSymbolAttrName(), builder->getStringAttr(name)));
+}
+
+static ParseResult parseGPUModuleOp(OpAsmParser &parser,
+ OperationState &result) {
+ StringAttr nameAttr;
+ if (parser.parseSymbolName(nameAttr, SymbolTable::getSymbolAttrName(),
+ result.attributes))
+ return failure();
+
+ // If module attributes are present, parse them.
+ if (parser.parseOptionalAttrDictWithKeyword(result.attributes))
+ return failure();
+
+ // Parse the module body.
+ auto *body = result.addRegion();
+ if (parser.parseRegion(*body, None, None))
+ return failure();
+
+ // Ensure that this module has a valid terminator.
+ GPUModuleOp::ensureTerminator(*body, parser.getBuilder(), result.location);
+ return success();
+}
+
+static void print(OpAsmPrinter &p, GPUModuleOp op) {
+ p << op.getOperationName() << ' ';
+ p.printSymbolName(op.getName());
+ p.printOptionalAttrDictWithKeyword(op.getAttrs(),
+ {SymbolTable::getSymbolAttrName()});
+ p.printRegion(op.getOperation()->getRegion(0), /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+}
+
// Namespace avoids ambiguous ReturnOpOperandAdaptor.
namespace mlir {
namespace gpu {
diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
index 37f9c2e7b84..0f8e2253980 100644
--- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp
@@ -140,8 +140,8 @@ namespace {
/// inside a nested module. It also creates an external function of the same
/// name in the parent module.
///
-/// The kernel modules are intended to be compiled to a cubin blob independently
-/// in a separate pass. The external functions can then be annotated with the
+/// The gpu.modules are intended to be compiled to a cubin blob independently in
+/// a separate pass. The external functions can then be annotated with the
/// symbol of the cubin accessor function.
class GpuKernelOutliningPass : public ModulePass<GpuKernelOutliningPass> {
public:
@@ -174,15 +174,19 @@ public:
}
private:
- // Returns a module containing kernelFunc and all callees (recursive).
- ModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
- const SymbolTable &parentSymbolTable) {
+ // Returns a gpu.module containing kernelFunc and all callees (recursive).
+ gpu::GPUModuleOp createKernelModule(gpu::GPUFuncOp kernelFunc,
+ const SymbolTable &parentSymbolTable) {
+ // TODO: This code cannot use an OpBuilder because it must be inserted into
+ // a SymbolTable by the caller. SymbolTable needs to be refactored to
+ // prevent manual building of Ops with symbols in code using SymbolTables
+ // and then this needs to use the OpBuilder.
auto context = getModule().getContext();
Builder builder(context);
- auto kernelModule =
- ModuleOp::create(builder.getUnknownLoc(), kernelFunc.getName());
- kernelModule.setAttr(gpu::GPUDialect::getKernelModuleAttrName(),
- builder.getUnitAttr());
+ OperationState state(kernelFunc.getLoc(),
+ gpu::GPUModuleOp::getOperationName());
+ gpu::GPUModuleOp::build(&builder, state, kernelFunc.getName());
+ auto kernelModule = cast<gpu::GPUModuleOp>(Operation::create(state));
SymbolTable symbolTable(kernelModule);
symbolTable.insert(kernelFunc);
diff --git a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
index 6865462595f..707f4a06395 100644
--- a/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/lower-launch-func-to-cuda.mlir
@@ -5,7 +5,7 @@ module attributes {gpu.container_module} {
// CHECK: llvm.mlir.global internal constant @[[kernel_name:.*]]("kernel\00")
// CHECK: llvm.mlir.global internal constant @[[global:.*]]("CUBIN")
- module @kernel_module attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"} {
+ gpu.module @kernel_module attributes {nvvm.cubin = "CUBIN"} {
gpu.func @kernel(%arg0: !llvm.float, %arg1: !llvm<"float*">) attributes {gpu.kernel} {
gpu.return
}
diff --git a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
index 62fe2b99338..78b9f56b620 100644
--- a/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
+++ b/mlir/test/Conversion/GPUToCUDA/lower-nvvm-kernel-to-cubin.mlir
@@ -1,7 +1,7 @@
// RUN: mlir-opt %s --test-kernel-to-cubin -split-input-file | FileCheck %s
-// CHECK: attributes {gpu.kernel_module, nvvm.cubin = "CUBIN"}
-module @foo attributes {gpu.kernel_module} {
+// CHECK: attributes {nvvm.cubin = "CUBIN"}
+gpu.module @foo {
llvm.func @kernel(%arg0 : !llvm.float, %arg1 : !llvm<"float*">)
// CHECK: attributes {gpu.kernel}
attributes { gpu.kernel } {
@@ -11,7 +11,7 @@ module @foo attributes {gpu.kernel_module} {
// -----
-module @bar attributes {gpu.kernel_module} {
+gpu.module @bar {
// CHECK: func @kernel_a
llvm.func @kernel_a()
attributes { gpu.kernel } {
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 24bf56557c3..7f69cb7482c 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK-LABEL: func @gpu_index_ops()
func @gpu_index_ops()
attributes { gpu.kernel } {
@@ -38,7 +38,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK-LABEL: func @gpu_all_reduce_op()
func @gpu_all_reduce_op()
attributes { gpu.kernel } {
@@ -55,7 +55,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK-LABEL: func @gpu_all_reduce_region()
func @gpu_all_reduce_region()
attributes { gpu.kernel } {
@@ -74,7 +74,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK-LABEL: func @gpu_shuffle()
func @gpu_shuffle()
attributes { gpu.kernel } {
@@ -99,7 +99,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK-LABEL: func @gpu_sync()
func @gpu_sync()
attributes { gpu.kernel } {
@@ -111,7 +111,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK: llvm.func @__nv_fabsf(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__nv_fabs(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_fabs
@@ -126,7 +126,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK: llvm.func @__nv_ceilf(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__nv_ceil(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_ceil
@@ -141,7 +141,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK: llvm.func @__nv_cosf(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__nv_cos(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_cos
@@ -156,7 +156,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
// CHECK: llvm.func @__nv_exp(!llvm.double) -> !llvm.double
// CHECK-LABEL: func @gpu_exp
@@ -174,7 +174,7 @@ module attributes {gpu.kernel_module} {
// -----
// Test that we handled properly operation with SymbolTable other than module op
-module attributes {gpu.kernel_module} {
+gpu.module @test_module {
"test.symbol_scope"() ({
// CHECK: test.symbol_scope
// CHECK: llvm.func @__nv_expf(!llvm.float) -> !llvm.float
diff --git a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir b/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir
index 69a16b25139..115c71d1280 100644
--- a/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/memory-attrbution.mlir
@@ -1,6 +1,6 @@
// RUN: mlir-opt --convert-gpu-to-nvvm --split-input-file %s | FileCheck %s
-module attributes {gpu.kernel_module} {
+gpu.module @kernel {
// CHECK-LABEL: llvm.func @private
gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, 5>) {
// Allocate private memory inside the function.
@@ -32,7 +32,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @kernel {
// Workgroup buffers are allocated as globals.
// CHECK: llvm.mlir.global internal @[[buffer:.*]]()
// CHECK-SAME: addr_space = 3
@@ -72,7 +72,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @kernel {
// Check that the total size was computed correctly.
// CHECK: llvm.mlir.global internal @[[buffer:.*]]()
// CHECK-SAME: addr_space = 3
@@ -113,7 +113,7 @@ module attributes {gpu.kernel_module} {
// -----
-module attributes {gpu.kernel_module} {
+gpu.module @kernel {
// Check that several buffers are defined.
// CHECK: llvm.mlir.global internal @[[buffer1:.*]]()
// CHECK-SAME: !llvm<"[1 x float]">
diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
index c0a68a9db2a..7f4081e4eda 100644
--- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir
@@ -9,7 +9,7 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @builtin_workgroup_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
@@ -32,7 +32,7 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @builtin_workgroup_id_y()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
@@ -55,7 +55,7 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPID:@.*]] built_in("WorkgroupId")
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @builtin_workgroup_id_z()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPID]]
@@ -78,7 +78,7 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[WORKGROUPSIZE:@.*]] built_in("WorkgroupSize")
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @builtin_workgroup_size_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[WORKGROUPSIZE]]
@@ -101,7 +101,7 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[LOCALINVOCATIONID:@.*]] built_in("LocalInvocationId")
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @builtin_local_id_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[LOCALINVOCATIONID]]
@@ -124,7 +124,7 @@ module attributes {gpu.container_module} {
// CHECK-LABEL: spv.module "Logical" "GLSL450"
// CHECK: spv.globalVariable [[NUMWORKGROUPS:@.*]] built_in("NumWorkgroups")
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @builtin_num_workgroups_x()
attributes {gpu.kernel} {
// CHECK: [[ADDRESS:%.*]] = spv._address_of [[NUMWORKGROUPS]]
diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
index d104c96cfa9..446c0d602ed 100644
--- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir
@@ -16,7 +16,7 @@ module attributes {gpu.container_module} {
}
// CHECK-LABEL: spv.module "Logical" "GLSL450"
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
// CHECK-DAG: spv.globalVariable [[WORKGROUPSIZEVAR:@.*]] built_in("WorkgroupSize") : !spv.ptr<vector<3xi32>, Input>
// CHECK-DAG: spv.globalVariable [[NUMWORKGROUPSVAR:@.*]] built_in("NumWorkgroups") : !spv.ptr<vector<3xi32>, Input>
// CHECK-DAG: spv.globalVariable [[LOCALINVOCATIONIDVAR:@.*]] built_in("LocalInvocationId") : !spv.ptr<vector<3xi32>, Input>
diff --git a/mlir/test/Conversion/GPUToSPIRV/loop.mlir b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
index 6d38360b7e8..bd97315a2ea 100644
--- a/mlir/test/Conversion/GPUToSPIRV/loop.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/loop.mlir
@@ -7,7 +7,7 @@ module attributes {gpu.container_module} {
return
}
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @loop_kernel(%arg2 : memref<10xf32>, %arg3 : memref<10xf32>)
attributes {gpu.kernel} {
// CHECK: [[LB:%.*]] = spv.constant 4 : i32
diff --git a/mlir/test/Conversion/GPUToSPIRV/simple.mlir b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
index e1b687c1a0b..cca5eb9d0b4 100644
--- a/mlir/test/Conversion/GPUToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/GPUToSPIRV/simple.mlir
@@ -2,7 +2,7 @@
module attributes {gpu.container_module} {
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
// CHECK: spv.module "Logical" "GLSL450" {
// CHECK-LABEL: func @kernel_1
// CHECK-SAME: {{%.*}}: f32 {spv.interface_var_abi = {binding = 0 : i32, descriptor_set = 0 : i32, storage_class = 12 : i32{{[}][}]}}
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 8323fdf8709..8f900bf6b5c 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -167,7 +167,7 @@ module attributes {gpu.container_module} {
}
func @launch_func_missing_module_attribute(%sz : index) {
- // expected-error@+1 {{module 'kernels' is missing the 'gpu.kernel_module' attribute}}
+ // expected-error@+1 {{kernel module 'kernels' is undefined}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
{ kernel = "kernel_1", kernel_module = @kernels }
: (index, index, index, index, index, index) -> ()
@@ -178,8 +178,7 @@ module attributes {gpu.container_module} {
// -----
module attributes {gpu.container_module} {
- module @kernels attributes {gpu.kernel_module} {
- }
+ gpu.module @kernels { }
func @launch_func_undefined_function(%sz : index) {
// expected-error@+1 {{kernel function 'kernel_1' is undefined}}
@@ -193,7 +192,7 @@ module attributes {gpu.container_module} {
// -----
module attributes {gpu.container_module} {
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @kernel_1(%arg1 : !llvm<"float*">) kernel {
gpu.return
}
@@ -211,7 +210,7 @@ module attributes {gpu.container_module} {
// -----
module attributes {gpu.container_module} {
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
gpu.return
}
@@ -229,7 +228,7 @@ module attributes {gpu.container_module} {
// -----
-module @kernels attributes {gpu.kernel_module} {
+gpu.module @kernels {
gpu.func @kernel_1(%arg1 : !llvm<"float*">) attributes { gpu.kernel } {
gpu.return
}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 1dd08cea492..033e7cbcb7e 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -60,7 +60,7 @@ module attributes {gpu.container_module} {
return
}
- module @kernels attributes {gpu.kernel_module} {
+ gpu.module @kernels {
gpu.func @kernel_1(%arg0 : f32, %arg1 : memref<?xf32, 1>) attributes {gpu.kernel} {
%tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index)
%tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index)
diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir
index 5adb881a1dc..425b4b3090c 100644
--- a/mlir/test/Dialect/GPU/outlining.mlir
+++ b/mlir/test/Dialect/GPU/outlining.mlir
@@ -136,7 +136,7 @@ func @recursive_device_function() {
gpu.return
}
-// CHECK: module @function_call_kernel attributes {gpu.kernel_module} {
+// CHECK: gpu.module @function_call_kernel {
// CHECK: gpu.func @function_call_kernel()
// CHECK: call @device_function() : () -> ()
// CHECK: call @device_function() : () -> ()
diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
index d6160d6d6e0..a05016f48e8 100644
--- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
+++ b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
@@ -105,7 +105,7 @@ static LogicalResult runMLIRPasses(ModuleOp m) {
applyPassManagerCLOptions(pm);
pm.addPass(createGpuKernelOutliningPass());
- auto &kernelPm = pm.nest<ModuleOp>();
+ auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
kernelPm.addPass(createLowerGpuOpsToNVVMOpsPass());
kernelPm.addPass(createConvertGPUKernelToCubinPass(&compilePtxToCubin));
pm.addPass(createLowerToLLVMPass());
OpenPOWER on IntegriCloud