diff options
| author | Alex Zinenko <zinenko@google.com> | 2018-12-06 11:34:27 -0800 |
|---|---|---|
| committer | jpienaar <jpienaar@google.com> | 2019-03-29 14:19:52 -0700 |
| commit | 7c89a225cfafef3dcf5de4991846de2d0e9dda06 (patch) | |
| tree | f1fa6bc1d2ff1afd92003b91b1a6bcbdaeb9f204 /mlir/lib/Transforms | |
| parent | 513d6d896ccf991ebc0acf85158d98e574f7b625 (diff) | |
| download | bcm5719-llvm-7c89a225cfafef3dcf5de4991846de2d0e9dda06.tar.gz bcm5719-llvm-7c89a225cfafef3dcf5de4991846de2d0e9dda06.zip | |
ConvertToCFG: support min/max in loop bounds.
The recently introduced `select` operation enables ConvertToCFG to support
min(max) in loop bounds. Individual min(max) is implemented as
`cmpi "lt"`(`cmpi "gt"`) followed by a `select` between the compared values.
Multiple results of an `affine_apply` operation extracted from the loop bounds
are reduced using min(max) in a sequential manner. While this may decrease the
potential for instruction-level parallelism, it is easier to recognize for the
following passes, in particular for the vectorizer.
PiperOrigin-RevId: 224376233
Diffstat (limited to 'mlir/lib/Transforms')
| -rw-r--r-- | mlir/lib/Transforms/ConvertToCFG.cpp | 41 |
1 files changed, 35 insertions, 6 deletions
diff --git a/mlir/lib/Transforms/ConvertToCFG.cpp b/mlir/lib/Transforms/ConvertToCFG.cpp index 852358f193a..5d17371cc1a 100644 --- a/mlir/lib/Transforms/ConvertToCFG.cpp +++ b/mlir/lib/Transforms/ConvertToCFG.cpp @@ -54,6 +54,9 @@ public: private: CFGValue *getConstantIndexValue(int64_t value); void visitStmtBlock(StmtBlock *stmtBlock); + CFGValue *buildMinMaxReductionSeq( + Location loc, CmpIPredicate predicate, + llvm::iterator_range<Operation::result_iterator> values); CFGFunction *cfgFunc; CFGFuncBuilder builder; @@ -123,6 +126,34 @@ void FunctionConverter::visitStmtBlock(StmtBlock *stmtBlock) { this->visit(&stmt); } +// Given a range of values, emit the code that reduces them with "min" or "max" +// depending on the provided comparison predicate. The predicate defines which +// comparison to perform, "lt" for "min", "gt" for "max" and is used for the +// `cmpi` operation followed by the `select` operation: +// +// %cond = cmpi "predicate" %v0, %v1 +// %result = select %cond, %v0, %v1 +// +// Multiple values are scanned in a linear sequence. This creates a data +// dependences that wouldn't exist in a tree reduction, but is easier to +// recognize as a reduction by the subsequent passes. +CFGValue *FunctionConverter::buildMinMaxReductionSeq( + Location loc, CmpIPredicate predicate, + llvm::iterator_range<Operation::result_iterator> values) { + assert(!llvm::empty(values) && "empty min/max chain"); + + auto valueIt = values.begin(); + CFGValue *value = cast<CFGValue>(*valueIt++); + for (; valueIt != values.end(); ++valueIt) { + auto cmpOp = builder.create<CmpIOp>(loc, predicate, value, *valueIt); + auto selectOp = + builder.create<SelectOp>(loc, cmpOp->getResult(), value, *valueIt); + value = cast<CFGValue>(selectOp->getResult()); + } + + return value; +} + // Convert a "for" loop to a flow of basic blocks. // // Create an SESE region for the loop (including its body) and append it to the @@ -235,15 +266,13 @@ void FunctionConverter::visitForStmt(ForStmt *forStmt) { functional::map(remapOperands, forStmt->getLowerBoundOperands()); auto lbAffineApply = builder.create<AffineApplyOp>( forStmt->getLoc(), forStmt->getLowerBoundMap(), operands); - // TODO(zinenko): support min/max in loop bounds; this requires min/max - // operations to be added to StandardOps first. - assert(lbAffineApply->getNumOperands() <= 1 && "NYI: min/max bounds"); - CFGValue *lowerBound = cast<CFGValue>(lbAffineApply->getResult(0)); + CFGValue *lowerBound = buildMinMaxReductionSeq( + forStmt->getLoc(), CmpIPredicate::SGT, lbAffineApply->getResults()); operands = functional::map(remapOperands, forStmt->getUpperBoundOperands()); auto ubAffineApply = builder.create<AffineApplyOp>( forStmt->getLoc(), forStmt->getUpperBoundMap(), operands); - assert(ubAffineApply->getNumOperands() <= 1 && "NYI: min/max bounds"); - CFGValue *upperBound = cast<CFGValue>(ubAffineApply->getResult(0)); + CFGValue *upperBound = buildMinMaxReductionSeq( + forStmt->getLoc(), CmpIPredicate::SLT, ubAffineApply->getResults()); builder.create<BranchOp>(builder.getUnknownLoc(), loopConditionBlock, lowerBound); |

