summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mlir/include/mlir/Dialect/LoopOps/LoopOps.td118
-rw-r--r--mlir/lib/Dialect/LoopOps/LoopOps.cpp197
-rw-r--r--mlir/test/Dialect/Loops/invalid.mlir181
-rw-r--r--mlir/test/Dialect/Loops/ops.mlir44
4 files changed, 536 insertions, 4 deletions
diff --git a/mlir/include/mlir/Dialect/LoopOps/LoopOps.td b/mlir/include/mlir/Dialect/LoopOps/LoopOps.td
index 707b788aaa8..7025d466a3e 100644
--- a/mlir/include/mlir/Dialect/LoopOps/LoopOps.td
+++ b/mlir/include/mlir/Dialect/LoopOps/LoopOps.td
@@ -55,9 +55,11 @@ def ForOp : Loop_Op<"for",
the terminator, so will the parsing even in cases when it is absent from the
custom format. For example:
+ ```mlir
loop.for %iv = %lb to %ub step %step {
... // body
}
+ ```
}];
let arguments = (ins Index:$lowerBound, Index:$upperBound, Index:$step);
let regions = (region SizedRegion<1>:$region);
@@ -88,18 +90,22 @@ def IfOp : Loop_Op<"if",
conditionally executing two regions of code. The operand to an if operation
is a boolean value. The operation produces no results. For example:
+ ```mlir
loop.if %b {
...
} else {
...
}
+ ```
The 'else' block is optional, and may be omitted. For
example:
+ ```mlir
loop.if %b {
...
}
+ ```
}];
let arguments = (ins I1:$condition);
let regions = (region SizedRegion<1>:$thenRegion, AnyRegion:$elseRegion);
@@ -124,8 +130,114 @@ def IfOp : Loop_Op<"if",
}];
}
-def TerminatorOp :
- Loop_Op<"terminator", [NativeOpTrait<"IsTerminator">]> {
+def ParallelOp : Loop_Op<"parallel",
+ [SameVariadicOperandSize, SingleBlockImplicitTerminator<"TerminatorOp">]> {
+ let summary = "parallel for operation";
+ let description = [{
+ The "loop.parallel" operation represents a loop nest taking 3 groups of SSA
+ values as operands that represent the lower bounds, upper bounds and steps,
+ respectively. The operation defines a variadic number of SSA values for its
+ induction variables. It has one region capturing the loop body. The
+ induction variables are represented as an argument of this region. These SSA
+ values always have type index, which is the size of the machine word. The
+ steps are values of type index, required to be positive.
+ The lower and upper bounds specify a half-open range: the range includes the
+ lower bound but does not include the upper bound.
+
+ Semantically we require that the iteration space can be iterated in any
+ order, and the loop body can be executed in parallel. If there are data
+ races, the behavior is undefined.
+
+ The parallel loop operation supports reduction of values produced by
+ individual iterations into a single result. This is modeled using the
+ loop.reduce operation (see loop.reduce for details). Each result of a
+ loop.parallel operation is associated with a reduce operation that is an
+ immediate child. Reduces are matched to result values in order of their
+ appearance in the body. Consequently, we require that the body region has
+ the same number of results as it has reduce operations.
+
+ The body region must contain exactly one block that terminates with
+ "loop.terminator". Parsing ParallelOp will create such region and insert the
+ terminator when it is absent from the custom format. For example:
+
+ ```mlir
+ loop.parallel (%iv) = (%lb) to (%ub) step (%step) {
+ %zero = constant 0.0 : f32
+ loop.reduce(%zero) {
+ ^bb0(%lhs : f32, %rhs: f32):
+ %res = addf %lhs, %rhs : f32
+ loop.reduce.return %res : f32
+ } : f32
+ }
+ ```
+ }];
+
+ let arguments = (ins Variadic<Index>:$lowerBound,
+ Variadic<Index>:$upperBound,
+ Variadic<Index>:$step);
+ let results = (outs Variadic<AnyType>:$results);
+ let regions = (region SizedRegion<1>:$body);
+}
+
+def ReduceOp : Loop_Op<"reduce", [HasParent<"ParallelOp">]> {
+ let summary = "reduce operation for parallel for";
+ let description = [{
+ "loop.reduce" is an operation occuring inside "loop.parallel" operations. It
+ consists of one block with two arguments which have the same type as the
+ operand of "loop.reduce".
+
+ "loop.reduce" is used to model the value for reduction computations of a
+ "loop.parallel" operation. It has to appear as an immediate child of a
+ "loop.parallel" and is associated with a result value of its parent
+ operation.
+
+ Association is in the order of appearance in the body where the first result
+ of a parallel loop operation corresponds to the first "loop.reduce" in the
+ operation's body region. The reduce operation takes a single operand, which
+ is the value to be used in the reduction.
+
+ The reduce operation contains a region whose entry block expects two
+ arguments of the same type as the operand. As the iteration order of the
+ parallel loop and hence reduction order is unspecified, the result of
+ reduction may be non-deterministic unless the operation is associative and
+ commutative.
+
+ The result of the reduce operation's body must have the same type as the
+ operands and associated result value of the parallel loop operation.
+ Example:
+
+ ```mlir
+ %zero = constant 0.0 : f32
+ loop.reduce(%zero) {
+ ^bb0(%lhs : f32, %rhs: f32):
+ %res = addf %lhs, %rhs : f32
+ loop.reduce.return %res : f32
+ } : f32
+ ```
+
+ }];
+
+ let arguments = (ins AnyType:$operand);
+ let regions = (region SizedRegion<1>:$reductionOperator);
+}
+
+def ReduceReturnOp :
+ Loop_Op<"reduce.return", [HasParent<"ReduceOp">, Terminator]> {
+ let summary = "terminator for reduce operation";
+ let description = [{
+ "loop.reduce.return" is a special terminator operation for the block inside
+ "loop.reduce". It terminates the region. It should have the same type as the
+ operand of "loop.reduce". Example for the custom format:
+
+ ```mlir
+ loop.reduce.return %res : f32
+ ```
+ }];
+
+ let arguments = (ins AnyType:$result);
+}
+
+def TerminatorOp : Loop_Op<"terminator", [Terminator]> {
let summary = "cf terminator operation";
let description = [{
"loop.terminator" is a special terminator operation for blocks inside
@@ -133,7 +245,9 @@ def TerminatorOp :
syntax. However, `std` control operations omit the terminator in their
custom syntax for brevity.
+ ```mlir
loop.terminator
+ ```
}];
// No custom parsing/printing form.
diff --git a/mlir/lib/Dialect/LoopOps/LoopOps.cpp b/mlir/lib/Dialect/LoopOps/LoopOps.cpp
index 5452b3d4ab8..4824421d190 100644
--- a/mlir/lib/Dialect/LoopOps/LoopOps.cpp
+++ b/mlir/lib/Dialect/LoopOps/LoopOps.cpp
@@ -185,13 +185,13 @@ static ParseResult parseIfOp(OpAsmParser &parser, OperationState &result) {
return failure();
// Parse the 'then' region.
- if (parser.parseRegion(*thenRegion, {}, {}))
+ if (parser.parseRegion(*thenRegion, /*arguments=*/{}, /*argTypes=*/{}))
return failure();
IfOp::ensureTerminator(*thenRegion, parser.getBuilder(), result.location);
// If we find an 'else' keyword then parse the 'else' region.
if (!parser.parseOptionalKeyword("else")) {
- if (parser.parseRegion(*elseRegion, {}, {}))
+ if (parser.parseRegion(*elseRegion, /*arguments=*/{}, /*argTypes=*/{}))
return failure();
IfOp::ensureTerminator(*elseRegion, parser.getBuilder(), result.location);
}
@@ -222,6 +222,199 @@ static void print(OpAsmPrinter &p, IfOp op) {
}
//===----------------------------------------------------------------------===//
+// ParallelOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(ParallelOp op) {
+ // Check that there is at least one value in lowerBound, upperBound and step.
+ // It is sufficient to test only step, because it is ensured already that the
+ // number of elements in lowerBound, upperBound and step are the same.
+ Operation::operand_range stepValues = op.step();
+ if (stepValues.empty())
+ return op.emitOpError(
+ "needs at least one tuple element for lowerBound, upperBound and step");
+
+ // Check whether all constant step values are positive.
+ for (Value stepValue : stepValues)
+ if (auto cst = dyn_cast_or_null<ConstantIndexOp>(stepValue.getDefiningOp()))
+ if (cst.getValue() <= 0)
+ return op.emitOpError("constant step operand must be positive");
+
+ // Check that the body defines the same number of block arguments as the
+ // number of tuple elements in step.
+ Block *body = &op.body().front();
+ if (body->getNumArguments() != stepValues.size())
+ return op.emitOpError(
+ "expects the same number of induction variables as bound and step "
+ "values");
+ for (auto arg : body->getArguments())
+ if (!arg.getType().isIndex())
+ return op.emitOpError(
+ "expects arguments for the induction variable to be of index type");
+
+ // Check that the number of results is the same as the number of ReduceOps.
+ SmallVector<ReduceOp, 4> reductions(body->getOps<ReduceOp>());
+ if (op.results().size() != reductions.size())
+ return op.emitOpError(
+ "expects number of results to be the same as number of reductions");
+
+ // Check that the types of the results and reductions are the same.
+ for (auto resultAndReduce : llvm::zip(op.results(), reductions)) {
+ auto resultType = std::get<0>(resultAndReduce).getType();
+ auto reduceOp = std::get<1>(resultAndReduce);
+ auto reduceType = reduceOp.operand().getType();
+ if (resultType != reduceType)
+ return reduceOp.emitOpError()
+ << "expects type of reduce to be the same as result type: "
+ << resultType;
+ }
+ return success();
+}
+
+static ParseResult parseParallelOp(OpAsmParser &parser,
+ OperationState &result) {
+ auto &builder = parser.getBuilder();
+ // Parse an opening `(` followed by induction variables followed by `)`
+ SmallVector<OpAsmParser::OperandType, 4> ivs;
+ if (parser.parseRegionArgumentList(ivs, /*requiredOperandCount=*/-1,
+ OpAsmParser::Delimiter::Paren))
+ return failure();
+
+ // Parse loop bounds.
+ SmallVector<OpAsmParser::OperandType, 4> lower;
+ if (parser.parseEqual() ||
+ parser.parseOperandList(lower, ivs.size(),
+ OpAsmParser::Delimiter::Paren) ||
+ parser.resolveOperands(lower, builder.getIndexType(), result.operands))
+ return failure();
+
+ SmallVector<OpAsmParser::OperandType, 4> upper;
+ if (parser.parseKeyword("to") ||
+ parser.parseOperandList(upper, ivs.size(),
+ OpAsmParser::Delimiter::Paren) ||
+ parser.resolveOperands(upper, builder.getIndexType(), result.operands))
+ return failure();
+
+ // Parse step value.
+ SmallVector<OpAsmParser::OperandType, 4> steps;
+ if (parser.parseKeyword("step") ||
+ parser.parseOperandList(steps, ivs.size(),
+ OpAsmParser::Delimiter::Paren) ||
+ parser.resolveOperands(steps, builder.getIndexType(), result.operands))
+ return failure();
+
+ // Now parse the body.
+ Region *body = result.addRegion();
+ SmallVector<Type, 4> types(ivs.size(), builder.getIndexType());
+ if (parser.parseRegion(*body, ivs, types))
+ return failure();
+
+ // Parse attributes and optional results (in case there is a reduce).
+ if (parser.parseOptionalAttrDict(result.attributes) ||
+ parser.parseOptionalColonTypeList(result.types))
+ return failure();
+
+ // Add a terminator if none was parsed.
+ ForOp::ensureTerminator(*body, builder, result.location);
+
+ return success();
+}
+
+static void print(OpAsmPrinter &p, ParallelOp op) {
+ p << op.getOperationName() << " (";
+ p.printOperands(op.body().front().getArguments());
+ p << ") = (" << op.lowerBound() << ") to (" << op.upperBound() << ") step ("
+ << op.step() << ")";
+ p.printRegion(op.body(), /*printEntryBlockArgs=*/false);
+ p.printOptionalAttrDict(op.getAttrs());
+ if (!op.results().empty())
+ p << " : " << op.getResultTypes();
+}
+
+//===----------------------------------------------------------------------===//
+// ReduceOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(ReduceOp op) {
+ // The region of a ReduceOp has two arguments of the same type as its operand.
+ auto type = op.operand().getType();
+ Block &block = op.reductionOperator().front();
+ if (block.empty())
+ return op.emitOpError("the block inside reduce should not be empty");
+ if (block.getNumArguments() != 2 ||
+ llvm::any_of(block.getArguments(), [&](const BlockArgument &arg) {
+ return arg.getType() != type;
+ }))
+ return op.emitOpError() << "expects two arguments to reduce block of type "
+ << type;
+
+ // Check that the block is terminated by a ReduceReturnOp.
+ if (!isa<ReduceReturnOp>(block.getTerminator()))
+ return op.emitOpError("the block inside reduce should be terminated with a "
+ "'loop.reduce.return' op");
+
+ return success();
+}
+
+static ParseResult parseReduceOp(OpAsmParser &parser, OperationState &result) {
+ // Parse an opening `(` followed by the reduced value followed by `)`
+ OpAsmParser::OperandType operand;
+ if (parser.parseLParen() || parser.parseOperand(operand) ||
+ parser.parseRParen())
+ return failure();
+
+ // Now parse the body.
+ Region *body = result.addRegion();
+ if (parser.parseRegion(*body, /*arguments=*/{}, /*argTypes=*/{}))
+ return failure();
+
+ // And the type of the operand (and also what reduce computes on).
+ Type resultType;
+ if (parser.parseColonType(resultType) ||
+ parser.resolveOperand(operand, resultType, result.operands))
+ return failure();
+
+ return success();
+}
+
+static void print(OpAsmPrinter &p, ReduceOp op) {
+ p << op.getOperationName() << "(" << op.operand() << ") ";
+ p.printRegion(op.reductionOperator());
+ p << " : " << op.operand().getType();
+}
+
+//===----------------------------------------------------------------------===//
+// ReduceReturnOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(ReduceReturnOp op) {
+ // The type of the return value should be the same type as the type of the
+ // operand of the enclosing ReduceOp.
+ auto reduceOp = cast<ReduceOp>(op.getParentOp());
+ Type reduceType = reduceOp.operand().getType();
+ if (reduceType != op.result().getType())
+ return op.emitOpError() << "needs to have type " << reduceType
+ << " (the type of the enclosing ReduceOp)";
+ return success();
+}
+
+static ParseResult parseReduceReturnOp(OpAsmParser &parser,
+ OperationState &result) {
+ OpAsmParser::OperandType operand;
+ Type resultType;
+ if (parser.parseOperand(operand) || parser.parseColonType(resultType) ||
+ parser.resolveOperand(operand, resultType, result.operands))
+ return failure();
+
+ return success();
+}
+
+static void print(OpAsmPrinter &p, ReduceReturnOp op) {
+ p << op.getOperationName() << " " << op.result() << " : "
+ << op.result().getType();
+}
+
+//===----------------------------------------------------------------------===//
// TableGen'd op method definitions
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Loops/invalid.mlir b/mlir/test/Dialect/Loops/invalid.mlir
index e346ad9772e..42f9d401645 100644
--- a/mlir/test/Dialect/Loops/invalid.mlir
+++ b/mlir/test/Dialect/Loops/invalid.mlir
@@ -113,3 +113,184 @@ func @loop_if_illegal_block_argument(%arg0: i1) {
return
}
+// -----
+
+func @parallel_arguments_different_tuple_size(
+ %arg0: index, %arg1: index, %arg2: index) {
+ // expected-error@+1 {{custom op 'loop.parallel' expected 1 operands}}
+ loop.parallel (%i0) = (%arg0) to (%arg1, %arg2) step () {
+ }
+ return
+}
+
+// -----
+
+func @parallel_body_arguments_wrong_type(
+ %arg0: index, %arg1: index, %arg2: index) {
+ // expected-error@+1 {{'loop.parallel' op expects arguments for the induction variable to be of index type}}
+ "loop.parallel"(%arg0, %arg1, %arg2) ({
+ ^bb0(%i0: f32):
+ "loop.terminator"() : () -> ()
+ }): (index, index, index) -> ()
+ return
+}
+
+// -----
+
+func @parallel_body_wrong_number_of_arguments(
+ %arg0: index, %arg1: index, %arg2: index) {
+ // expected-error@+1 {{'loop.parallel' op expects the same number of induction variables as bound and step values}}
+ "loop.parallel"(%arg0, %arg1, %arg2) ({
+ ^bb0(%i0: index, %i1: index):
+ "loop.terminator"() : () -> ()
+ }): (index, index, index) -> ()
+ return
+}
+
+// -----
+
+func @parallel_no_tuple_elements() {
+ // expected-error@+1 {{'loop.parallel' op needs at least one tuple element for lowerBound, upperBound and step}}
+ loop.parallel () = () to () step () {
+ }
+ return
+}
+
+// -----
+
+func @parallel_step_not_positive(
+ %arg0: index, %arg1: index, %arg2: index, %arg3: index) {
+ // expected-error@+3 {{constant step operand must be positive}}
+ %c0 = constant 1 : index
+ %c1 = constant 0 : index
+ loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%c0, %c1) {
+ }
+ return
+}
+
+// -----
+
+func @parallel_fewer_results_than_reduces(
+ %arg0 : index, %arg1: index, %arg2: index) {
+ // expected-error@+1 {{expects number of results to be the same as number of reductions}}
+ loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+ %c0 = constant 1.0 : f32
+ loop.reduce(%c0) {
+ ^bb0(%lhs: f32, %rhs: f32):
+ loop.reduce.return %lhs : f32
+ } : f32
+ }
+ return
+}
+
+// -----
+
+func @parallel_more_results_than_reduces(
+ %arg0 : index, %arg1 : index, %arg2 : index) {
+ // expected-error@+1 {{expects number of results to be the same as number of reductions}}
+ %res = loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+ } : f32
+
+ return
+}
+
+// -----
+
+func @parallel_different_types_of_results_and_reduces(
+ %arg0 : index, %arg1: index, %arg2: index) {
+ %res = loop.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
+ // expected-error@+1 {{expects type of reduce to be the same as result type: 'f32'}}
+ loop.reduce(%arg0) {
+ ^bb0(%lhs: index, %rhs: index):
+ loop.reduce.return %lhs : index
+ } : index
+ } : f32
+ return
+}
+
+// -----
+
+func @top_level_reduce(%arg0 : f32) {
+ // expected-error@+1 {{expects parent op 'loop.parallel'}}
+ loop.reduce(%arg0) {
+ ^bb0(%lhs : f32, %rhs : f32):
+ loop.reduce.return %lhs : f32
+ } : f32
+ return
+}
+
+// -----
+
+func @reduce_empty_block(%arg0 : index, %arg1 : f32) {
+ %res = loop.parallel (%i0) = (%arg0) to (%arg0) step (%arg0) {
+ // expected-error@+1 {{the block inside reduce should not be empty}}
+ loop.reduce(%arg1) {
+ ^bb0(%lhs : f32, %rhs : f32):
+ } : f32
+ } : f32
+ return
+}
+
+// -----
+
+func @reduce_too_many_args(%arg0 : index, %arg1 : f32) {
+ %res = loop.parallel (%i0) = (%arg0) to (%arg0) step (%arg0) {
+ // expected-error@+1 {{expects two arguments to reduce block of type 'f32'}}
+ loop.reduce(%arg1) {
+ ^bb0(%lhs : f32, %rhs : f32, %other : f32):
+ loop.reduce.return %lhs : f32
+ } : f32
+ } : f32
+ return
+}
+
+// -----
+
+func @reduce_wrong_args(%arg0 : index, %arg1 : f32) {
+ %res = loop.parallel (%i0) = (%arg0) to (%arg0) step (%arg0) {
+ // expected-error@+1 {{expects two arguments to reduce block of type 'f32'}}
+ loop.reduce(%arg1) {
+ ^bb0(%lhs : f32, %rhs : i32):
+ loop.reduce.return %lhs : f32
+ } : f32
+ } : f32
+ return
+}
+
+
+// -----
+
+func @reduce_wrong_terminator(%arg0 : index, %arg1 : f32) {
+ %res = loop.parallel (%i0) = (%arg0) to (%arg0) step (%arg0) {
+ // expected-error@+1 {{the block inside reduce should be terminated with a 'loop.reduce.return' op}}
+ loop.reduce(%arg1) {
+ ^bb0(%lhs : f32, %rhs : f32):
+ "loop.terminator"(): () -> ()
+ } : f32
+ } : f32
+ return
+}
+
+// -----
+
+func @reduceReturn_wrong_type(%arg0 : index, %arg1: f32) {
+ %res = loop.parallel (%i0) = (%arg0) to (%arg0) step (%arg0) {
+ loop.reduce(%arg1) {
+ ^bb0(%lhs : f32, %rhs : f32):
+ %c0 = constant 1 : index
+ // expected-error@+1 {{needs to have type 'f32' (the type of the enclosing ReduceOp)}}
+ loop.reduce.return %c0 : index
+ } : f32
+ } : f32
+ return
+}
+
+// -----
+
+func @reduceReturn_not_inside_reduce(%arg0 : f32) {
+ "foo.region"() ({
+ // expected-error@+1 {{expects parent op 'loop.reduce'}}
+ loop.reduce.return %arg0 : f32
+ }): () -> ()
+ return
+}
diff --git a/mlir/test/Dialect/Loops/ops.mlir b/mlir/test/Dialect/Loops/ops.mlir
index a51438bc727..42994ad63b1 100644
--- a/mlir/test/Dialect/Loops/ops.mlir
+++ b/mlir/test/Dialect/Loops/ops.mlir
@@ -49,3 +49,47 @@ func @std_if_else(%arg0: i1, %arg1: f32) {
// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
// CHECK-NEXT: } else {
// CHECK-NEXT: %{{.*}} = addf %{{.*}}, %{{.*}} : f32
+
+func @std_parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
+ %arg3 : index, %arg4 : index) {
+ %step = constant 1 : index
+ loop.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3)
+ step (%arg4, %step) {
+ %min_cmp = cmpi "slt", %i0, %i1 : index
+ %min = select %min_cmp, %i0, %i1 : index
+ %max_cmp = cmpi "sge", %i0, %i1 : index
+ %max = select %max_cmp, %i0, %i1 : index
+ %red = loop.parallel (%i2) = (%min) to (%max) step (%i1) {
+ %zero = constant 0.0 : f32
+ loop.reduce(%zero) {
+ ^bb0(%lhs : f32, %rhs: f32):
+ %res = addf %lhs, %rhs : f32
+ loop.reduce.return %res : f32
+ } : f32
+ } : f32
+ }
+ return
+}
+// CHECK-LABEL: func @std_parallel_loop(
+// CHECK-SAME: %[[ARG0:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[ARG1:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[ARG2:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[ARG3:[A-Za-z0-9]+]]:
+// CHECK-SAME: %[[ARG4:[A-Za-z0-9]+]]:
+// CHECK: %[[STEP:.*]] = constant 1 : index
+// CHECK-NEXT: loop.parallel (%[[I0:.*]], %[[I1:.*]]) = (%[[ARG0]], %[[ARG1]]) to
+// CHECK: (%[[ARG2]], %[[ARG3]]) step (%[[ARG4]], %[[STEP]]) {
+// CHECK-NEXT: %[[MIN_CMP:.*]] = cmpi "slt", %[[I0]], %[[I1]] : index
+// CHECK-NEXT: %[[MIN:.*]] = select %[[MIN_CMP]], %[[I0]], %[[I1]] : index
+// CHECK-NEXT: %[[MAX_CMP:.*]] = cmpi "sge", %[[I0]], %[[I1]] : index
+// CHECK-NEXT: %[[MAX:.*]] = select %[[MAX_CMP]], %[[I0]], %[[I1]] : index
+// CHECK-NEXT: loop.parallel (%{{.*}}) = (%[[MIN]]) to (%[[MAX]]) step (%[[I1]]) {
+// CHECK-NEXT: %[[ZERO:.*]] = constant 0.000000e+00 : f32
+// CHECK-NEXT: loop.reduce(%[[ZERO]]) {
+// CHECK-NEXT: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32):
+// CHECK-NEXT: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
+// CHECK-NEXT: loop.reduce.return %[[RES]] : f32
+// CHECK-NEXT: } : f32
+// CHECK-NEXT: "loop.terminator"() : () -> ()
+// CHECK-NEXT: } : f32
+// CHECK-NEXT: "loop.terminator"() : () -> ()
OpenPOWER on IntegriCloud