diff options
Diffstat (limited to 'mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp')
-rw-r--r-- | mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp index 3ea1f85d62f..15633ac6d50 100644 --- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp +++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp @@ -98,7 +98,7 @@ static Value getOrEmitUpperBound(ForOp forOp, OpBuilder &) { // This roughly corresponds to the "matcher" part of the pattern-based // rewriting infrastructure. template <typename OpTy> -LogicalResult checkLoopNestMappableImpl(OpTy forOp, unsigned numDims) { +static LogicalResult checkLoopNestMappableImpl(OpTy forOp, unsigned numDims) { Region &limit = forOp.region(); for (unsigned i = 0, e = numDims; i < e; ++i) { Operation *nested = &forOp.getBody()->front(); @@ -124,8 +124,8 @@ LogicalResult checkLoopNestMappableImpl(OpTy forOp, unsigned numDims) { } template <typename OpTy> -LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims, - unsigned numThreadDims) { +static LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims, + unsigned numThreadDims) { if (numBlockDims < 1 || numThreadDims < 1) { LLVM_DEBUG(llvm::dbgs() << "nothing to map"); return success(); @@ -142,8 +142,8 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims, } template <typename OpTy> -LogicalResult checkLoopOpMappable(OpTy forOp, unsigned numBlockDims, - unsigned numThreadDims) { +static LogicalResult checkLoopOpMappable(OpTy forOp, unsigned numBlockDims, + unsigned numThreadDims) { if (numBlockDims < 1 || numThreadDims < 1) { LLVM_DEBUG(llvm::dbgs() << "nothing to map"); return success(); @@ -265,8 +265,8 @@ Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp, /// `nids`. The innermost loop is mapped to the x-dimension, followed by the /// next innermost loop to y-dimension, followed by z-dimension. template <typename OpTy> -OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value> ids, - ArrayRef<Value> nids) { +static OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value> ids, + ArrayRef<Value> nids) { auto nDims = ids.size(); assert(nDims == nids.size()); for (auto dim : llvm::seq<unsigned>(0, nDims)) { @@ -285,9 +285,10 @@ OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value> ids, /// Utility method to convert the gpu::KernelDim3 object for representing id of /// each workgroup/workitem and number of workgroup/workitems along a dimension /// of the launch into a container. -void packIdAndNumId(gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids, - unsigned nDims, SmallVectorImpl<Value> &ids, - SmallVectorImpl<Value> &nids) { +static void packIdAndNumId(gpu::KernelDim3 kernelIds, + gpu::KernelDim3 kernelNids, unsigned nDims, + SmallVectorImpl<Value> &ids, + SmallVectorImpl<Value> &nids) { assert(nDims <= 3 && "invalid number of launch dimensions"); SmallVector<Value, 3> allIds = {kernelIds.z, kernelIds.y, kernelIds.x}; SmallVector<Value, 3> allNids = {kernelNids.z, kernelNids.y, kernelNids.x}; @@ -300,9 +301,9 @@ void packIdAndNumId(gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids, /// Generate the body of the launch operation. template <typename OpTy> -LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp, - gpu::LaunchOp launchOp, unsigned numBlockDims, - unsigned numThreadDims) { +static LogicalResult +createLaunchBody(OpBuilder &builder, OpTy rootForOp, gpu::LaunchOp launchOp, + unsigned numBlockDims, unsigned numThreadDims) { OpBuilder::InsertionGuard bodyInsertionGuard(builder); builder.setInsertionPointToEnd(&launchOp.body().front()); auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc()); @@ -337,8 +338,9 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp, // Convert the computation rooted at the `rootForOp`, into a GPU kernel with the // given workgroup size and number of workgroups. template <typename OpTy> -LogicalResult createLaunchFromOp(OpTy rootForOp, ArrayRef<Value> numWorkGroups, - ArrayRef<Value> workGroupSizes) { +static LogicalResult createLaunchFromOp(OpTy rootForOp, + ArrayRef<Value> numWorkGroups, + ArrayRef<Value> workGroupSizes) { OpBuilder builder(rootForOp.getOperation()); if (numWorkGroups.size() > 3) { return rootForOp.emitError("invalid ") |