diff options
author | Roman Tereshin <rtereshin@apple.com> | 2018-07-24 21:48:56 +0000 |
---|---|---|
committer | Roman Tereshin <rtereshin@apple.com> | 2018-07-24 21:48:56 +0000 |
commit | 1ba1f9310c26507bdeaa70695e4e5529b33e842d (patch) | |
tree | 6778503ce98ac0f807dd0ddce31681974c9cc5f6 /llvm/lib/Analysis/ScalarEvolution.cpp | |
parent | 5ddc0a2b149daf41a4df3d215555c96343326cf5 (diff) | |
download | bcm5719-llvm-1ba1f9310c26507bdeaa70695e4e5529b33e842d.tar.gz bcm5719-llvm-1ba1f9310c26507bdeaa70695e4e5529b33e842d.zip |
[SCEV] Add zext(C + x + ...) -> D + zext(C-D + x + ...)<nuw><nsw> transform
if the top level addition in (D + (C-D + x + ...)) could be proven to
not wrap, where the choice of D also maximizes the number of trailing
zeroes of (C-D + x + ...), ensuring homogeneous behaviour of the
transformation and better canonicalization of such expressions.
This enables better canonicalization of expressions like
1 + zext(5 + 20 * %x + 24 * %y) and
zext(6 + 20 * %x + 24 * %y)
which get both transformed to
2 + zext(4 + 20 * %x + 24 * %y)
This pattern is common in address arithmetics and the transformation
makes it easier for passes like LoadStoreVectorizer to prove that 2 or
more memory accesses are consecutive and optimize (vectorize) them.
Reviewed By: mzolotukhin
Differential Revision: https://reviews.llvm.org/D48853
llvm-svn: 337859
Diffstat (limited to 'llvm/lib/Analysis/ScalarEvolution.cpp')
-rw-r--r-- | llvm/lib/Analysis/ScalarEvolution.cpp | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 08f0d2939be..b2f12a12d1d 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -1777,6 +1777,44 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); } + + // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...))<nuw> + // if D + (C - D + x + y + ...) could be proven to not unsigned wrap + // where D maximizes the number of trailing zeros of (C - D + x + y + ...) + // + // Useful while proving that address arithmetic expressions are equal or + // differ by a small constant amount, see LoadStoreVectorizer pass. + if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) { + // Often address arithmetics contain expressions like + // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))). + // ConstantRange is unable to prove that it's possible to transform + // (5 + (4 * X)) to (1 + (4 + (4 * X))) w/o underflowing: + // + // | Expression | ConstantRange | KnownBits | + // |---------------|------------------------|-----------------------| + // | i8 4 * X | [L: 0, U: 253) | XXXX XX00 | + // | | => Min: 0, Max: 252 | => Min: 0, Max: 252 | + // | | | | + // | i8 4 * X + 5 | [L: 5, U: 2) (wrapped) | YYYY YY01 | + // | (101) | => Min: 0, Max: 255 | => Min: 1, Max: 253 | + // + // As KnownBits are not available for SCEV expressions, use number of + // trailing zeroes instead: + APInt C = SC->getAPInt(); + uint32_t TZ = C.getBitWidth(); + for (unsigned I = 1, E = SA->getNumOperands(); I < E && TZ; ++I) + TZ = std::min(TZ, GetMinTrailingZeros(SA->getOperand(I))); + if (TZ) { + APInt D = TZ < C.getBitWidth() ? C.trunc(TZ).zext(C.getBitWidth()) : C; + if (D != 0) { + const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); + const SCEV *SResidual = + getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); + const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); + return getAddExpr(SZExtD, SZExtR, SCEV::FlagNUW, Depth + 1); + } + } + } } if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) { |