Add gpu.shuffle op.

This will allow us to lower most of gpu.all_reduce (when all_reduce doesn't exist in the target dialect) within the GPU dialect, and only do target-specific lowering for the shuffle op. PiperOrigin-RevId: 286548256
author: Christian Sigg <csigg@google.com> 2019-12-20 02:52:21 -0800
committer: A. Unique TensorFlower <gardener@tensorflow.org> 2019-12-20 02:52:52 -0800
commit: 42d46b4efa9d7b596c74c101a777f4464a8f1fd0 (patch)
tree: 1329db2e71549b610e2eb121528ea8b3284411e7 /mlir/include
parent: 7811ad3c2b312fb5eda5ed5f3a1d15b8e6085b24 (diff)
download: bcm5719-llvm-42d46b4efa9d7b596c74c101a777f4464a8f1fd0.tar.gz
bcm5719-llvm-42d46b4efa9d7b596c74c101a777f4464a8f1fd0.zip
2 files changed, 36 insertions, 0 deletions
diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h
index 495238ffea6..93c0b13ee3e 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h
+++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h
@@ -26,6 +26,7 @@
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/FunctionSupport.h"
 #include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/OpImplementation.h"
 #include "mlir/IR/SymbolTable.h"
 
 namespace mlir {
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 46433c6edd5..6751f0a3f70 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -536,6 +536,41 @@ def GPU_AllReduceOp : GPU_Op<"all_reduce",
   let verifier = [{ return ::verifyAllReduce(*this); }];
 }
 
+def GPU_ShuffleOpXor : StrEnumAttrCase<"xor">;
+
+def GPU_ShuffleModeAttr : StrEnumAttr<"ShuffleModeAttr",
+    "Indexing modes supported by gpu.shuffle.",
+    [
+      GPU_ShuffleOpXor,
+    ]>;
+
+def GPU_ShuffleOp : GPU_Op<"shuffle", [NoSideEffect]>,
+    Arguments<(ins AnyType:$value, I32:$offset, I32:$width,
+               GPU_ShuffleModeAttr:$mode)>,
+    Results<(outs AnyType:$result, I1:$valid)> {
+  let summary = "Shuffles values within a subgroup.";
+  let description = [{
+    The "shuffle" op moves values to a different invocation within the same
+    subgroup.
+
+    For example
+    ```
+      %1, %2 = gpu.shuffle %0, %offset, %width xor : f32
+    ```
+    for lane k returns the value from lane `k ^ offset` and `true` if that lane
+    is smaller than %width. Otherwise it returns an unspecified value and
+    `false`. A lane is the index of an invocation relative to its subgroup.
+
+    The width specifies the number of invocations that participate in the
+    shuffle. The width needs to be the same for all invocations that participate
+    in the shuffle. Exactly the first `width` invocations of a subgroup need to
+    execute this op in convergence.
+  }];
+  let verifier = [{ return ::verifyShuffleOp(*this); }];
+  let printer = [{ printShuffleOp(p, *this); }];
+  let parser = [{ return parseShuffleOp(parser, result); }];
+}
+
 def GPU_BarrierOp : GPU_Op<"barrier"> {
   let summary = "Synchronizes all work items of a workgroup.";
   let description = [{
author	Christian Sigg <csigg@google.com>	2019-12-20 02:52:21 -0800
committer	A. Unique TensorFlower <gardener@tensorflow.org>	2019-12-20 02:52:52 -0800
commit	42d46b4efa9d7b596c74c101a777f4464a8f1fd0 (patch)
tree	1329db2e71549b610e2eb121528ea8b3284411e7 /mlir/include
parent	7811ad3c2b312fb5eda5ed5f3a1d15b8e6085b24 (diff)
download	bcm5719-llvm-42d46b4efa9d7b596c74c101a777f4464a8f1fd0.tar.gz bcm5719-llvm-42d46b4efa9d7b596c74c101a777f4464a8f1fd0.zip