diff options
-rw-r--r-- | polly/include/polly/ScopInfo.h | 1 | ||||
-rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 2 | ||||
-rw-r--r-- | polly/lib/Support/SCEVAffinator.cpp | 120 | ||||
-rw-r--r-- | polly/lib/Support/SCEVValidator.cpp | 18 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll | 4 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll | 15 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll | 9 | ||||
-rw-r--r-- | polly/test/ScopInfo/complex-successor-structure-3.ll | 8 | ||||
-rw-r--r-- | polly/test/ScopInfo/invariant_load_zext_parameter.ll | 15 | ||||
-rw-r--r-- | polly/test/ScopInfo/modulo_zext_1.ll | 53 | ||||
-rw-r--r-- | polly/test/ScopInfo/modulo_zext_2.ll | 61 | ||||
-rw-r--r-- | polly/test/ScopInfo/modulo_zext_3.ll | 53 | ||||
-rw-r--r-- | polly/test/ScopInfo/multidim_only_ivs_3d_cast.ll | 21 | ||||
-rw-r--r-- | polly/test/ScopInfo/non-precise-inv-load-2.ll | 49 | ||||
-rw-r--r-- | polly/test/ScopInfo/ranged_parameter.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/simple_loop_2.ll | 43 | ||||
-rw-r--r-- | polly/test/ScopInfo/user_provided_non_dominating_assumptions.ll | 4 |
17 files changed, 424 insertions, 54 deletions
diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 0b263a59e82..9d20e274e93 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -75,6 +75,7 @@ enum AssumptionKind { ALIASING, INBOUNDS, WRAPPING, + UNSIGNED, ERRORBLOCK, COMPLEXITY, INFINITELOOP, diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 09dc559cd2e..b741559178b 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -3543,6 +3543,8 @@ static std::string toString(AssumptionKind Kind) { return "Inbounds"; case WRAPPING: return "No-overflows"; + case UNSIGNED: + return "Signed-unsigned"; case COMPLEXITY: return "Low complexity"; case ERRORBLOCK: diff --git a/polly/lib/Support/SCEVAffinator.cpp b/polly/lib/Support/SCEVAffinator.cpp index 9f3d3011e42..4a008c3fef8 100644 --- a/polly/lib/Support/SCEVAffinator.cpp +++ b/polly/lib/Support/SCEVAffinator.cpp @@ -36,6 +36,14 @@ static cl::opt<bool> IgnoreIntegerWrapping( // compile time. static int const MaxConjunctsInPwAff = 100; +// The maximal number of bits for which a zero-extend is modeled precisely. +static unsigned const MaxZextSmallBitWidth = 7; + +/// @brief Return true if a zero-extend from @p Width bits is precisely modeled. +static bool isPreciseZeroExtend(unsigned Width) { + return Width <= MaxZextSmallBitWidth; +} + /// @brief Add the number of basic sets in @p Domain to @p User static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) { auto *NumBasicSets = static_cast<unsigned *>(User); @@ -82,6 +90,26 @@ static void combine(__isl_keep PWACtx &PWAC0, const __isl_take PWACtx &PWAC1, PWAC0.second = isl_set_union(PWAC0.second, PWAC1.second); } +/// @brief Set the possible wrapping of @p Expr to @p Flags. +static const SCEV *setNoWrapFlags(ScalarEvolution &SE, const SCEV *Expr, + SCEV::NoWrapFlags Flags) { + auto *NAry = dyn_cast<SCEVNAryExpr>(Expr); + if (!NAry) + return Expr; + + SmallVector<const SCEV *, 8> Ops(NAry->op_begin(), NAry->op_end()); + switch (Expr->getSCEVType()) { + case scAddExpr: + return SE.getAddExpr(Ops, Flags); + case scMulExpr: + return SE.getMulExpr(Ops, Flags); + case scAddRecExpr: + return SE.getAddRecExpr(Ops, cast<SCEVAddRecExpr>(Expr)->getLoop(), Flags); + default: + return Expr; + } +} + SCEVAffinator::SCEVAffinator(Scop *S, LoopInfo &LI) : S(S), Ctx(S->getIslCtx()), R(S->getRegion()), SE(*S->getSE()), LI(LI), TD(R.getEntry()->getParent()->getParent()->getDataLayout()) {} @@ -143,7 +171,7 @@ __isl_give PWACtx SCEVAffinator::checkForWrapping(const SCEV *Expr, __isl_give isl_pw_aff * SCEVAffinator::addModuloSemantic(__isl_take isl_pw_aff *PWA, Type *ExprType) const { - unsigned Width = TD.getTypeStoreSizeInBits(ExprType); + unsigned Width = TD.getTypeSizeInBits(ExprType); isl_ctx *Ctx = isl_pw_aff_get_ctx(PWA); isl_val *ModVal = isl_val_int_from_ui(Ctx, Width); @@ -245,13 +273,97 @@ SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) { __isl_give PWACtx SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - llvm_unreachable("SCEVZeroExtendExpr not yet supported"); + // A zero-extended value can be interpreted as a piecewise defined signed + // value. If the value was non-negative it stays the same, otherwise it + // is the sum of the original value and 2^n where n is the bit-width of + // the original (or operand) type. Examples: + // zext i8 127 to i32 -> { [127] } + // zext i8 -1 to i32 -> { [256 + (-1)] } = { [255] } + // zext i8 %v to i32 -> [v] -> { [v] | v >= 0; [256 + v] | v < 0 } + // + // However, LLVM/Scalar Evolution uses zero-extend (potentially lead by a + // truncate) to represent some forms of modulo computation. The left-hand side + // of the condition in the code below would result in the SCEV + // "zext i1 <false, +, true>for.body" which is just another description + // of the C expression "i & 1 != 0" or, equivalently, "i % 2 != 0". + // + // for (i = 0; i < N; i++) + // if (i & 1 != 0 /* == i % 2 */) + // /* do something */ + // + // If we do not make the modulo explicit but only use the mechanism described + // above we will get the very restrictive assumption "N < 3", because for all + // values of N >= 3 the SCEVAddRecExpr operand of the zero-extend would wrap. + // Alternatively, we can make the modulo in the operand explicit in the + // resulting piecewise function and thereby avoid the assumption on N. For the + // example this would result in the following piecewise affine function: + // { [i0] -> [(1)] : 2*floor((-1 + i0)/2) = -1 + i0; + // [i0] -> [(0)] : 2*floor((i0)/2) = i0 } + // To this end we can first determine if the (immediate) operand of the + // zero-extend can wrap and, in case it might, we will use explicit modulo + // semantic to compute the result instead of emitting non-wrapping + // assumptions. + // + // Note that operands with large bit-widths are less likely to be negative + // because it would result in a very large access offset or loop bound after + // the zero-extend. To this end one can optimistically assume the operand to + // be positive and avoid the piecewise definition if the bit-width is bigger + // than some threshold (here MaxZextSmallBitWidth). + // + // We choose to go with a hybrid solution of all modeling techniques described + // above. For small bit-widths (up to MaxZextSmallBitWidth) we will model the + // wrapping explicitly and use a piecewise defined function. However, if the + // bit-width is bigger than MaxZextSmallBitWidth we will employ overflow + // assumptions and assume the "former negative" piece will not exist. + + auto *Op = Expr->getOperand(); + unsigned Width = TD.getTypeSizeInBits(Op->getType()); + + bool Precise = isPreciseZeroExtend(Width); + + auto Flags = getNoWrapFlags(Op); + auto NoWrapFlags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); + bool OpCanWrap = Precise && !(Flags & SCEV::FlagNSW); + if (OpCanWrap) + Op = setNoWrapFlags(SE, Op, NoWrapFlags); + + auto OpPWAC = visit(Op); + if (OpCanWrap) + OpPWAC.first = + addModuloSemantic(OpPWAC.first, Expr->getOperand()->getType()); + + // If the width is to big we assume the negative part does not occur. + if (!Precise) { + auto *NegOpPWA = isl_pw_aff_neg(isl_pw_aff_copy(OpPWAC.first)); + auto *NegDom = isl_pw_aff_pos_set(NegOpPWA); + auto *ExprDomain = BB ? S->getDomainConditions(BB) : nullptr; + NegDom = ExprDomain ? isl_set_intersect(NegDom, ExprDomain) : NegDom; + auto DL = BB ? BB->getTerminator()->getDebugLoc() : DebugLoc(); + OpPWAC.second = isl_set_union(OpPWAC.second, isl_set_copy(NegDom)); + S->addAssumption(UNSIGNED, isl_set_params(NegDom), DL, AS_RESTRICTION); + return OpPWAC; + } + + // If the width is small build the piece for the non-negative part and + // the one for the negative part and unify them. + auto *NonNegDom = isl_pw_aff_nonneg_set(isl_pw_aff_copy(OpPWAC.first)); + auto *NonNegPWA = isl_pw_aff_intersect_domain(isl_pw_aff_copy(OpPWAC.first), + isl_set_copy(NonNegDom)); + auto *WidthVal = isl_val_int_from_ui(isl_pw_aff_get_ctx(OpPWAC.first), Width); + auto *ExpVal = isl_val_2exp(WidthVal); + + auto ExpPWAC = getPWACtxFromPWA( + isl_pw_aff_val_on_domain(isl_set_complement(NonNegDom), ExpVal)); + combine(OpPWAC, ExpPWAC, isl_pw_aff_add); + + OpPWAC.first = + isl_pw_aff_coalesce(isl_pw_aff_union_add(NonNegPWA, OpPWAC.first)); + return OpPWAC; } __isl_give PWACtx SCEVAffinator::visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { - // Assuming the value is signed, a sign extension is basically a noop. - // TODO: Reconsider this as soon as we support unsigned values. + // As all values are represented as signed, a sign extension is a noop. return visit(Expr->getOperand()); } diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 1bd6015699e..b43b897959e 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -156,23 +156,7 @@ public: } class ValidatorResult visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { - ValidatorResult Op = visit(Expr->getOperand()); - - switch (Op.getType()) { - case SCEVType::INT: - case SCEVType::PARAM: - // We currently do not represent a truncate expression as an affine - // expression. If it is constant during Scop execution, we treat it as a - // parameter. - return ValidatorResult(SCEVType::PARAM, Expr); - case SCEVType::IV: - DEBUG(dbgs() << "INVALID: ZeroExtend of SCEVType::IV expression"); - return ValidatorResult(SCEVType::INVALID); - case SCEVType::INVALID: - return Op; - } - - llvm_unreachable("Unknown SCEVType"); + return visit(Expr->getOperand()); } class ValidatorResult visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { diff --git a/polly/test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll b/polly/test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll index 8dab88e3a48..bbf267495d3 100644 --- a/polly/test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll +++ b/polly/test/Isl/CodeGen/invariant_load_parameters_cyclic_dependence.ll @@ -10,12 +10,12 @@ ; 2) To generate (A[N * M] / 2) [p1] the preloaded value is needed. ; ; SCOP: p0: (%N * %M) -; SCOP: p1: (zext i32 (%tmp4 /u 2) to i64) +; SCOP: p1: (%tmp4 /u 2) ; ; CHECK: polly.preload.merge: ; CHECK: %polly.preload.tmp4.merge = phi i32 [ %polly.access.A.load, %polly.preload.exec ], [ 0, %polly.preload.cond ] ; CHECK: %3 = lshr i32 %polly.preload.tmp4.merge, 1 -; CHECK: %4 = zext i32 %3 to i64 +; CHECK: %4 = sext i32 %0 to i64 ; ; void f(int *restrict A, int *restrict B, int N, int M) { ; diff --git a/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll b/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll index e2bec4c9979..abbda0e1412 100644 --- a/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll +++ b/polly/test/Isl/CodeGen/run-time-condition-with-scev-parameters.ll @@ -1,12 +1,21 @@ +; RUN: opt %loadPolly -polly-ast -analyze < %s | FileCheck %s --check-prefix=AST ; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s +; TODO: FIXME: Simplify the context. +; AST: if (n >= 1 && 0 == n <= -1) + ; CHECK: entry: ; CHECK-NEXT: %0 = zext i32 %n to i64 ; CHECK: polly.split_new_and_old: -; CHECK-NEXT: %1 = icmp sge i64 %0, 1 -; CHECK-NEXT: br i1 %1, label %polly.start, label %for.body4 - +; CHECK-NEXT: %1 = sext i32 %n to i64 +; CHECK-NEXT: %2 = icmp sge i64 %1, 1 +; CHECK-NEXT: %3 = sext i32 %n to i64 +; CHECK-NEXT: %4 = icmp sle i64 %3, -1 +; CHECK-NEXT: %5 = sext i1 %4 to i64 +; CHECK-NEXT: %6 = icmp eq i64 0, %5 +; CHECK-NEXT: %7 = and i1 %2, %6 +; CHECK-NEXT: br i1 %7, label %polly.start, label %for.body4 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll b/polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll index dd806fe9f7f..07c92e53444 100644 --- a/polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll +++ b/polly/test/Isl/CodeGen/scop_never_executed_runtime_check_location.ll @@ -3,11 +3,12 @@ ; Verify that we generate the runtime check code after the conditional branch ; in the SCoP region entering block (here %entry). ; -; CHECK: entry: -; CHECK: %[[T0:[._a-zA-Z0-9]]] = zext i32 %n to i64 -; CHECK: br i1 false +; CHECK: entry: +; CHECK-NEXT: %0 = zext i32 %n to i64 +; CHECK-NEXT: br i1 false ; -; CHECK: %[[T1:[._a-zA-Z0-9]]] = icmp sge i64 %[[T0]], 1 +; CHECK: %[[T0:[._a-zA-Z0-9]]] = sext i32 %n to i64 +; CHECK: %[[T1:[._a-zA-Z0-9]]] = icmp sge i64 %[[T0]], 1 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/ScopInfo/complex-successor-structure-3.ll b/polly/test/ScopInfo/complex-successor-structure-3.ll index 9cfc1eb0a5c..e9143b54baf 100644 --- a/polly/test/ScopInfo/complex-successor-structure-3.ll +++ b/polly/test/ScopInfo/complex-successor-structure-3.ll @@ -4,15 +4,15 @@ ; domains small and concise. ; ; CHECK: Assumed Context: -; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { : } +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26] -> { : } ; CHECK-NEXT: Invalid Context: -; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { : 1 = 0 } +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26] -> { : 1 = 0 } ; ; CHECK: Stmt_FINAL ; CHECK-NEXT: Domain := -; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { Stmt_FINAL[] }; +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26] -> { Stmt_FINAL[] }; ; CHECK-NEXT: Schedule := -; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { Stmt_FINAL[] -> [22] }; +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26] -> { Stmt_FINAL[] -> [22] }; ; ; ; void f(short *restrict In, int *restrict Out) { diff --git a/polly/test/ScopInfo/invariant_load_zext_parameter.ll b/polly/test/ScopInfo/invariant_load_zext_parameter.ll index 6728df5c3fb..3ffb2a47e1e 100644 --- a/polly/test/ScopInfo/invariant_load_zext_parameter.ll +++ b/polly/test/ScopInfo/invariant_load_zext_parameter.ll @@ -8,17 +8,22 @@ ; } ; } ; -; Check that even though the invariant load is some subpart of a parameter we -; will generate valid code and replace it by the preloaded value, e.g., to evaluate -; the execution context of the invariant access to I1. +; CHECK: Assumed Context: +; CHECK-NEXT: [loadI0] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [loadI0] -> { : loadI0 < 0 } ; -; CHECK: p0: (zext i32 %loadI0 to i64) +; CHECK: p0: %loadI0 +; +; CHECK: Stmt_if_then +; CHECK-NEXT: Domain := +; CHECK-NEXT: [loadI0] -> { Stmt_if_then[i0] : loadI0 = 0 and 0 <= i0 <= 999 }; ; ; CODEGEN: polly.preload.begin: ; CODEGEN-NEXT: %polly.access.I0 = getelementptr i32, i32* %I0, i64 0 ; CODEGEN-NEXT: %polly.access.I0.load = load i32, i32* %polly.access.I0 ; CODEGEN-NEXT: store i32 %polly.access.I0.load, i32* %loadI0.preload.s2a -; CODEGEN-NEXT: %0 = zext i32 %polly.access.I0.load to i64 +; CODEGEN-NEXT: %0 = sext i32 %polly.access.I0.load to i64 ; CODEGEN-NEXT: %1 = icmp eq i64 %0, 0 ; CODEGEN-NEXT: br label %polly.preload.cond ; diff --git a/polly/test/ScopInfo/modulo_zext_1.ll b/polly/test/ScopInfo/modulo_zext_1.ll new file mode 100644 index 00000000000..cbdb9ce40c7 --- /dev/null +++ b/polly/test/ScopInfo/modulo_zext_1.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N] -> { : 1 = 0 } +; CHECK-NEXT: p0: %N +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_body +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[1] : 2*floor((-1 + i0)/2) = -1 + i0; Stmt_for_body[i0] -> MemRef_A[0] : 2*floor((i0)/2) = i0 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[1] : 2*floor((-1 + i0)/2) = -1 + i0; Stmt_for_body[i0] -> MemRef_A[0] : 2*floor((i0)/2) = i0 }; +; CHECK-NEXT: } +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; A[i % 2]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc1, %for.inc ] + %cmp = icmp slt i32 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %i.t = trunc i32 %i.0 to i1 + %rem = zext i1 %i.t to i32 + %idxprom = sext i32 %rem to i64 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom + %tmp = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %tmp, 1 + store i32 %inc, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc1 = add nuw nsw i32 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/modulo_zext_2.ll b/polly/test/ScopInfo/modulo_zext_2.ll new file mode 100644 index 00000000000..642046fd07a --- /dev/null +++ b/polly/test/ScopInfo/modulo_zext_2.ll @@ -0,0 +1,61 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N] -> { : 1 = 0 } +; CHECK-NEXT: p0: %N +; CHECK: Statements { +; CHECK-NEXT: Stmt_if_then +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N] -> { Stmt_if_then[i0] : 2*floor((-1 + i0)/2) = -1 + i0 and 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N] -> { Stmt_if_then[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_if_then[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_if_then[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } +; +; void f(int *A, int N) { +; for (int i = 0; i < N; i++) { +; if (i & 1) +; A[i]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32 %N) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp1 = trunc i64 %indvars.iv to i32 + %and = and i32 %tmp1, 1 + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp2 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %tmp2, 1 + store i32 %inc, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %for.body, %if.then + br label %for.inc + +for.inc: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/modulo_zext_3.ll b/polly/test/ScopInfo/modulo_zext_3.ll new file mode 100644 index 00000000000..b44c40ae570 --- /dev/null +++ b/polly/test/ScopInfo/modulo_zext_3.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N] -> { : N >= 4294967297 } +; CHECK-NEXT: p0: %N +; CHECK: Statements { +; CHECK-NEXT: Stmt_for_body +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> [i0] }; +; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[i0] }; +; CHECK-NEXT: } +; +; void f(long *A, long N) { +; long K = /* 2^32 */ 4294967296; +; for (long i = 0; i < N; i++) { +; A[i % K]++; +; } +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i64* %A, i64 %N) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 0, %entry ], [ %inc1, %for.inc ] + %cmp = icmp slt i64 %i.0, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %i.t = trunc i64 %i.0 to i33 + %rem = zext i33 %i.t to i64 + %arrayidx = getelementptr inbounds i64, i64* %A, i64 %rem + %tmp = load i64, i64* %arrayidx, align 4 + %inc = add nsw i64 %tmp, 1 + store i64 %inc, i64* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %inc1 = add nuw nsw i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/multidim_only_ivs_3d_cast.ll b/polly/test/ScopInfo/multidim_only_ivs_3d_cast.ll index fffcf7c1b7c..fbe722e7361 100644 --- a/polly/test/ScopInfo/multidim_only_ivs_3d_cast.ll +++ b/polly/test/ScopInfo/multidim_only_ivs_3d_cast.ll @@ -8,36 +8,33 @@ ; A[i][j][k] = 1.0; ; } -; We currently fail to get the relation between the 32 and 64 bit versions of -; m and o, such that we generate unnecessary run-time checks. This is not a -; correctness issue, but could be improved. - ; CHECK: Assumed Context: -; CHECK-NEXT: [o, m, n, p_3, p_4] -> { : p_3 >= m and p_4 >= o } +; CHECK-NEXT: [o, m, n] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [o, m, n] -> { : o < 0 or m < 0 or (o >= 0 and m >= 0 and n <= 0) or (m = 0 and o >= 0 and n > 0) or (o = 0 and m > 0 and n > 0) } + ; ; CHECK: p0: %o ; CHECK-NEXT: p1: %m ; CHECK-NEXT: p2: %n -; CHECK-NEXT: p3: (zext i32 %m to i64) -; CHECK-NEXT: p4: (zext i32 %o to i64) -; CHECK-NOT: p5 +; CHECK-NOT: p3 ; ; CHECK: Arrays { ; CHECK-NEXT: double MemRef_A[*][(zext i32 %m to i64)][(zext i32 %o to i64)]; // Element size 8 ; CHECK-NEXT: } ; ; CHECK: Arrays (Bounds as pw_affs) { -; CHECK-NEXT: double MemRef_A[*][ [p_3] -> { [] -> [(p_3)] } ][ [p_4] -> { [] -> [(p_4)] } ]; // Element size 8 +; CHECK-NEXT: double MemRef_A[*][ [m] -> { [] -> [(m)] } ][ [o] -> { [] -> [(o)] } ]; // Element size 8 ; CHECK-NEXT: } ; ; CHECK: Statements { ; CHECK-NEXT: Stmt_for_k ; CHECK-NEXT: Domain := -; CHECK-NEXT: [o, m, n, p_3, p_4] -> { Stmt_for_k[i0, i1, i2] : 0 <= i0 < n and 0 <= i1 < m and 0 <= i2 < o }; +; CHECK-NEXT: [o, m, n] -> { Stmt_for_k[i0, i1, i2] : 0 <= i0 < n and 0 <= i1 < m and 0 <= i2 < o }; ; CHECK-NEXT: Schedule := -; CHECK-NEXT: [o, m, n, p_3, p_4] -> { Stmt_for_k[i0, i1, i2] -> [i0, i1, i2] }; +; CHECK-NEXT: [o, m, n] -> { Stmt_for_k[i0, i1, i2] -> [i0, i1, i2] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: [o, m, n, p_3, p_4] -> { Stmt_for_k[i0, i1, i2] -> MemRef_A[i0, i1, i2] }; +; CHECK-NEXT: [o, m, n] -> { Stmt_for_k[i0, i1, i2] -> MemRef_A[i0, i1, i2] }; ; CHECK-NEXT: } target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/polly/test/ScopInfo/non-precise-inv-load-2.ll b/polly/test/ScopInfo/non-precise-inv-load-2.ll new file mode 100644 index 00000000000..b9a27cfb7ba --- /dev/null +++ b/polly/test/ScopInfo/non-precise-inv-load-2.ll @@ -0,0 +1,49 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [c] -> { Stmt_for_body[i0] -> MemRef_I[-1 + c] }; +; CHECK-NEXT: Execution Context: [c] -> { : c > 0 } +; CHECK-NEXT: } +; CHECK-NEXT: Context: +; CHECK-NEXT: [c] -> { : -128 <= c <= 127 } +; CHECK-NEXT: Assumed Context: +; CHECK-NEXT: [c] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [c] -> { : c <= 0 } +; +; void f(int *A, int *I, unsigned char c) { +; for (int i = 0; i < 10; i++) +; A[i] += I[c - (char)1]; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A, i32* %I, i8 zeroext %c) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %exitcond = icmp ne i64 %indvars.iv, 10 + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %sub = add i8 %c, -1 + %conv = zext i8 %sub to i64 + %arrayidx = getelementptr inbounds i32, i32* %I, i64 %conv + %tmp = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp1 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %tmp1, %tmp + store i32 %add, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/ranged_parameter.ll b/polly/test/ScopInfo/ranged_parameter.ll index 9dc3348f557..b59972c5090 100644 --- a/polly/test/ScopInfo/ranged_parameter.ll +++ b/polly/test/ScopInfo/ranged_parameter.ll @@ -4,7 +4,7 @@ ; range metadata (see bottom of the file) are present: ; ; CHECK: Context: -; CHECK: [p_0] -> { : 0 <= p_0 <= 255 } +; CHECK: [tmp] -> { : 0 <= tmp <= 255 } ; ; void jd(int *A, int *p /* in [0,256) */) { ; for (int i = 0; i < 1024; i++) diff --git a/polly/test/ScopInfo/simple_loop_2.ll b/polly/test/ScopInfo/simple_loop_2.ll new file mode 100644 index 00000000000..a3e08947687 --- /dev/null +++ b/polly/test/ScopInfo/simple_loop_2.ll @@ -0,0 +1,43 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s + +; void f(int a[], int N) { +; int i; +; for (i = 0; i < N; ++i) +; a[i] = i; +; } + +; CHECK: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; +; CHECK: Arrays { +; CHECK-NEXT: i32 MemRef_a[*]; // Element size 4 +; CHECK-NEXT: } +; +; CHECK: Statements { +; CHECK-NEXT: Stmt_bb +; CHECK-NEXT: Domain := +; CHECK-NEXT: [N] -> { Stmt_bb[i0] : 0 <= i0 < N }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [N] -> { Stmt_bb[i0] -> [i0] }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_bb[i0] -> MemRef_a[i0] }; +; CHECK-NEXT: } + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" + +define void @f(i32* nocapture %a, i64 %N) nounwind { +entry: + br label %bb + +bb: ; preds = %bb, %entry + %i = phi i32 [ 0, %entry ], [ %i.inc, %bb ] + %scevgep = getelementptr inbounds i32, i32* %a, i32 %i + store i32 %i, i32* %scevgep + %i.inc = add nsw i32 %i, 1 + %i.ext = zext i32 %i.inc to i64 + %exitcond = icmp eq i64 %i.ext, %N + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/polly/test/ScopInfo/user_provided_non_dominating_assumptions.ll b/polly/test/ScopInfo/user_provided_non_dominating_assumptions.ll index 351ffe4697b..6e10313ce6b 100644 --- a/polly/test/ScopInfo/user_provided_non_dominating_assumptions.ll +++ b/polly/test/ScopInfo/user_provided_non_dominating_assumptions.ll @@ -1,8 +1,8 @@ ; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops -disable-output < %s 2>&1 | FileCheck %s ; ; CHECK: remark: <unknown>:0:0: SCoP begins here. -; CHECK-NEXT: remark: <unknown>:0:0: Inbounds assumption: [i, N, p_2, M] -> { : N <= i or (N > i and p_2 <= 100) } -; CHECK-NEXT: remark: <unknown>:0:0: Inbounds assumption: [i, N, p_2, M] -> { : N <= i or (N > i and p_2 > 0 and M >= p_2) or (p_2 = 0 and N > i) } +; CHECK-NEXT: remark: <unknown>:0:0: Signed-unsigned restriction: [i, N, M] -> { : N >= i and M < 0 } +; CHECK-NEXT: remark: <unknown>:0:0: Inbounds assumption: [i, N, M] -> { : N <= i or (N > i and M <= 100) } ; CHECK-NEXT: remark: <unknown>:0:0: SCoP ends here. ; ; void f(int *restrict A, int *restrict B, int i, int N, int M, int C[100][100]) { |