diff options
| author | Christian Sigg <csigg@google.com> | 2019-12-20 02:52:21 -0800 |
|---|---|---|
| committer | A. Unique TensorFlower <gardener@tensorflow.org> | 2019-12-20 02:52:52 -0800 |
| commit | 42d46b4efa9d7b596c74c101a777f4464a8f1fd0 (patch) | |
| tree | 1329db2e71549b610e2eb121528ea8b3284411e7 /mlir/include | |
| parent | 7811ad3c2b312fb5eda5ed5f3a1d15b8e6085b24 (diff) | |
| download | bcm5719-llvm-42d46b4efa9d7b596c74c101a777f4464a8f1fd0.tar.gz bcm5719-llvm-42d46b4efa9d7b596c74c101a777f4464a8f1fd0.zip | |
Add gpu.shuffle op.
This will allow us to lower most of gpu.all_reduce (when all_reduce
doesn't exist in the target dialect) within the GPU dialect, and only do
target-specific lowering for the shuffle op.
PiperOrigin-RevId: 286548256
Diffstat (limited to 'mlir/include')
| -rw-r--r-- | mlir/include/mlir/Dialect/GPU/GPUDialect.h | 1 | ||||
| -rw-r--r-- | mlir/include/mlir/Dialect/GPU/GPUOps.td | 35 |
2 files changed, 36 insertions, 0 deletions
diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h index 495238ffea6..93c0b13ee3e 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -26,6 +26,7 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/FunctionSupport.h" #include "mlir/IR/OpDefinition.h" +#include "mlir/IR/OpImplementation.h" #include "mlir/IR/SymbolTable.h" namespace mlir { diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td index 46433c6edd5..6751f0a3f70 100644 --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -536,6 +536,41 @@ def GPU_AllReduceOp : GPU_Op<"all_reduce", let verifier = [{ return ::verifyAllReduce(*this); }]; } +def GPU_ShuffleOpXor : StrEnumAttrCase<"xor">; + +def GPU_ShuffleModeAttr : StrEnumAttr<"ShuffleModeAttr", + "Indexing modes supported by gpu.shuffle.", + [ + GPU_ShuffleOpXor, + ]>; + +def GPU_ShuffleOp : GPU_Op<"shuffle", [NoSideEffect]>, + Arguments<(ins AnyType:$value, I32:$offset, I32:$width, + GPU_ShuffleModeAttr:$mode)>, + Results<(outs AnyType:$result, I1:$valid)> { + let summary = "Shuffles values within a subgroup."; + let description = [{ + The "shuffle" op moves values to a different invocation within the same + subgroup. + + For example + ``` + %1, %2 = gpu.shuffle %0, %offset, %width xor : f32 + ``` + for lane k returns the value from lane `k ^ offset` and `true` if that lane + is smaller than %width. Otherwise it returns an unspecified value and + `false`. A lane is the index of an invocation relative to its subgroup. + + The width specifies the number of invocations that participate in the + shuffle. The width needs to be the same for all invocations that participate + in the shuffle. Exactly the first `width` invocations of a subgroup need to + execute this op in convergence. + }]; + let verifier = [{ return ::verifyShuffleOp(*this); }]; + let printer = [{ printShuffleOp(p, *this); }]; + let parser = [{ return parseShuffleOp(parser, result); }]; +} + def GPU_BarrierOp : GPU_Op<"barrier"> { let summary = "Synchronizes all work items of a workgroup."; let description = [{ |

