summaryrefslogtreecommitdiffstats
path: root/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp')
-rw-r--r--mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp32
1 files changed, 17 insertions, 15 deletions
diff --git a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
index 3ea1f85d62f..15633ac6d50 100644
--- a/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
+++ b/mlir/lib/Conversion/LoopsToGPU/LoopsToGPU.cpp
@@ -98,7 +98,7 @@ static Value getOrEmitUpperBound(ForOp forOp, OpBuilder &) {
// This roughly corresponds to the "matcher" part of the pattern-based
// rewriting infrastructure.
template <typename OpTy>
-LogicalResult checkLoopNestMappableImpl(OpTy forOp, unsigned numDims) {
+static LogicalResult checkLoopNestMappableImpl(OpTy forOp, unsigned numDims) {
Region &limit = forOp.region();
for (unsigned i = 0, e = numDims; i < e; ++i) {
Operation *nested = &forOp.getBody()->front();
@@ -124,8 +124,8 @@ LogicalResult checkLoopNestMappableImpl(OpTy forOp, unsigned numDims) {
}
template <typename OpTy>
-LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
- unsigned numThreadDims) {
+static LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
+ unsigned numThreadDims) {
if (numBlockDims < 1 || numThreadDims < 1) {
LLVM_DEBUG(llvm::dbgs() << "nothing to map");
return success();
@@ -142,8 +142,8 @@ LogicalResult checkLoopNestMappable(OpTy forOp, unsigned numBlockDims,
}
template <typename OpTy>
-LogicalResult checkLoopOpMappable(OpTy forOp, unsigned numBlockDims,
- unsigned numThreadDims) {
+static LogicalResult checkLoopOpMappable(OpTy forOp, unsigned numBlockDims,
+ unsigned numThreadDims) {
if (numBlockDims < 1 || numThreadDims < 1) {
LLVM_DEBUG(llvm::dbgs() << "nothing to map");
return success();
@@ -265,8 +265,8 @@ Optional<OpTy> LoopToGpuConverter::collectBounds(OpTy forOp,
/// `nids`. The innermost loop is mapped to the x-dimension, followed by the
/// next innermost loop to y-dimension, followed by z-dimension.
template <typename OpTy>
-OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value> ids,
- ArrayRef<Value> nids) {
+static OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value> ids,
+ ArrayRef<Value> nids) {
auto nDims = ids.size();
assert(nDims == nids.size());
for (auto dim : llvm::seq<unsigned>(0, nDims)) {
@@ -285,9 +285,10 @@ OpTy createGPULaunchLoops(OpTy rootForOp, ArrayRef<Value> ids,
/// Utility method to convert the gpu::KernelDim3 object for representing id of
/// each workgroup/workitem and number of workgroup/workitems along a dimension
/// of the launch into a container.
-void packIdAndNumId(gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids,
- unsigned nDims, SmallVectorImpl<Value> &ids,
- SmallVectorImpl<Value> &nids) {
+static void packIdAndNumId(gpu::KernelDim3 kernelIds,
+ gpu::KernelDim3 kernelNids, unsigned nDims,
+ SmallVectorImpl<Value> &ids,
+ SmallVectorImpl<Value> &nids) {
assert(nDims <= 3 && "invalid number of launch dimensions");
SmallVector<Value, 3> allIds = {kernelIds.z, kernelIds.y, kernelIds.x};
SmallVector<Value, 3> allNids = {kernelNids.z, kernelNids.y, kernelNids.x};
@@ -300,9 +301,9 @@ void packIdAndNumId(gpu::KernelDim3 kernelIds, gpu::KernelDim3 kernelNids,
/// Generate the body of the launch operation.
template <typename OpTy>
-LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp,
- gpu::LaunchOp launchOp, unsigned numBlockDims,
- unsigned numThreadDims) {
+static LogicalResult
+createLaunchBody(OpBuilder &builder, OpTy rootForOp, gpu::LaunchOp launchOp,
+ unsigned numBlockDims, unsigned numThreadDims) {
OpBuilder::InsertionGuard bodyInsertionGuard(builder);
builder.setInsertionPointToEnd(&launchOp.body().front());
auto returnOp = builder.create<gpu::ReturnOp>(launchOp.getLoc());
@@ -337,8 +338,9 @@ LogicalResult createLaunchBody(OpBuilder &builder, OpTy rootForOp,
// Convert the computation rooted at the `rootForOp`, into a GPU kernel with the
// given workgroup size and number of workgroups.
template <typename OpTy>
-LogicalResult createLaunchFromOp(OpTy rootForOp, ArrayRef<Value> numWorkGroups,
- ArrayRef<Value> workGroupSizes) {
+static LogicalResult createLaunchFromOp(OpTy rootForOp,
+ ArrayRef<Value> numWorkGroups,
+ ArrayRef<Value> workGroupSizes) {
OpBuilder builder(rootForOp.getOperation());
if (numWorkGroups.size() > 3) {
return rootForOp.emitError("invalid ")
OpenPOWER on IntegriCloud