diff options
| author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-05-12 15:12:43 +0000 |
|---|---|---|
| committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-05-12 15:12:43 +0000 |
| commit | 404a0f81ea47c743d75944b504bcc0a4dc81d5df (patch) | |
| tree | 7ee580a8732f39342b0a651dc3fce484691a6338 | |
| parent | 1168ca925cad2186772ff1692279846635baa1a0 (diff) | |
| download | bcm5719-llvm-404a0f81ea47c743d75944b504bcc0a4dc81d5df.tar.gz bcm5719-llvm-404a0f81ea47c743d75944b504bcc0a4dc81d5df.zip | |
Check overflows in RTCs and bail accordingly
We utilize assumptions on the input to model IR in polyhedral world.
To verify these assumptions we version the code and guard it with a
runtime-check (RTC). However, since the RTCs are themselves generated
from the polyhedral representation we generate them under the same
assumptions that they should verify. In other words, the guarantees
that we try to provide with the RTCs do not hold for the RTCs
themselves. To this end it is necessary to employ a different check
for the RTCs that will verify the assumptions did hold for them too.
Differential Revision: http://reviews.llvm.org/D20165
llvm-svn: 269299
| -rw-r--r-- | polly/include/polly/CodeGen/IslExprBuilder.h | 89 | ||||
| -rw-r--r-- | polly/lib/CodeGen/CodeGeneration.cpp | 9 | ||||
| -rw-r--r-- | polly/lib/CodeGen/IslExprBuilder.cpp | 134 | ||||
| -rw-r--r-- | polly/lib/CodeGen/IslNodeBuilder.cpp | 6 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll | 8 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/aliasing_multidimensional_access.ll | 28 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/aliasing_parametric_simple_1.ll | 11 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll | 22 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/exprModDiv.ll | 20 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll | 55 | ||||
| -rw-r--r-- | polly/test/Isl/CodeGen/no-overflow-tracking.ll | 73 |
11 files changed, 393 insertions, 62 deletions
diff --git a/polly/include/polly/CodeGen/IslExprBuilder.h b/polly/include/polly/CodeGen/IslExprBuilder.h index fad9308b593..3e112829e50 100644 --- a/polly/include/polly/CodeGen/IslExprBuilder.h +++ b/polly/include/polly/CodeGen/IslExprBuilder.h @@ -74,14 +74,12 @@ namespace polly { /// in the wild. Signed computations are needed, as loop bounds may become /// negative. /// -/// FIXME: Hardcoding sizes can cause issues: +/// It is possible to track overflows that occured in the generated IR. See the +/// description of @see OverflowState for more information. /// -/// a) Certain run-time checks that we may want to generate can involve the -/// size of the data types the computation is performed on. When code -/// generating these run-time checks to isl_ast_expr[essions], the -/// resulting computation may require more than 64 bit. +/// FIXME: Hardcoding sizes can cause issues: /// -/// b) On embedded systems and especially for high-level-synthesis 64 bit +/// - On embedded systems and especially for high-level-synthesis 64 bit /// computations are very costly. /// /// The right approach is to compute the minimal necessary bitwidth and @@ -89,11 +87,6 @@ namespace polly { /// to use this information in our IslAstGenerator. Preliminary patches are /// available, but have not been committed yet. /// -/// 2) We always flag computations with 'nsw' -/// -/// As isl_ast_expr[essions] assume arbitrary precision, no wrapping should -/// ever occur in the generated LLVM-IR (assuming the data type chosen is large -/// enough). class IslExprBuilder { public: /// @brief A map from isl_ids to llvm::Values. @@ -112,9 +105,7 @@ public: IslExprBuilder(Scop &S, PollyIRBuilder &Builder, IDToValueTy &IDToValue, ValueMapT &GlobalMap, const llvm::DataLayout &DL, llvm::ScalarEvolution &SE, llvm::DominatorTree &DT, - llvm::LoopInfo &LI) - : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), - DL(DL), SE(SE), DT(DT), LI(LI) {} + llvm::LoopInfo &LI); /// @brief Create LLVM-IR for an isl_ast_expr[ession]. /// @@ -140,9 +131,37 @@ public: /// @return The type with which the expression should be computed. llvm::IntegerType *getType(__isl_keep isl_ast_expr *Expr); + /// @brief Change if runtime overflows are tracked or not. + /// + /// @param Enable Flag to enable/disable the tracking. + /// + /// Note that this will reset the tracking state and that tracking is only + /// allowed if the last tracked expression dominates the current insert point. + void setTrackOverflow(bool Enable); + + /// @brief Return the current overflow status or nullptr if it is not tracked. + /// + /// @return A nullptr if tracking is disabled or otherwise an i1 that has the + /// value of "0" if and only if no overflow happened since tracking + /// was enabled. + llvm::Value *getOverflowState() const; + private: Scop &S; + /// @brief Flag that will be set if an overflow occurred at runtime. + /// + /// Note that this flag is by default a nullptr and if it is a nullptr + /// we will not record overflows but simply perform the computations. + /// The intended usage is as follows: + /// - If overflows in [an] expression[s] should be tracked, call + /// the setTrackOverflow(true) function. + /// - Use create(...) for all expressions that should be checked. + /// - Call getOverflowState() to get the value representing the current + /// state of the overflow flag. + /// - To stop tracking call setTrackOverflow(false). + llvm::Value *OverflowState; + PollyIRBuilder &Builder; IDToValueTy &IDToValue; ValueMapT &GlobalMap; @@ -165,6 +184,48 @@ private: llvm::Value *createInt(__isl_take isl_ast_expr *Expr); llvm::Value *createOpAddressOf(__isl_take isl_ast_expr *Expr); llvm::Value *createAccessAddress(__isl_take isl_ast_expr *Expr); + + /// @brief Create a binary operation @p Opc and track overflows if requested. + /// + /// @param OpC The binary operation that should be performed [Add/Sub/Mul]. + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the binary operation. + llvm::Value *createBinOp(llvm::BinaryOperator::BinaryOps Opc, + llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name); + + /// @brief Create an addition and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the addition. + llvm::Value *createAdd(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); + + /// @brief Create a subtraction and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the subtraction. + llvm::Value *createSub(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); + + /// @brief Create a multiplication and track overflows if requested. + /// + /// @param LHS The left operand. + /// @param RHS The right operand. + /// @param Name The (base) name of the new IR operations. + /// + /// @return A value that represents the result of the multiplication. + llvm::Value *createMul(llvm::Value *LHS, llvm::Value *RHS, + const llvm::Twine &Name = ""); }; } diff --git a/polly/lib/CodeGen/CodeGeneration.cpp b/polly/lib/CodeGen/CodeGeneration.cpp index c5c6527d911..757e24813cf 100644 --- a/polly/lib/CodeGen/CodeGeneration.cpp +++ b/polly/lib/CodeGen/CodeGeneration.cpp @@ -148,6 +148,7 @@ public: PollyIRBuilder Builder = createPollyIRBuilder(EnteringBB, Annotator); IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S); + IslExprBuilder &ExprBuilder = NodeBuilder.getExprBuilder(); // Only build the run-time condition and parameters _after_ having // introduced the conditional branch. This is important as the conditional @@ -191,7 +192,13 @@ public: NodeBuilder.addParameters(S.getContext()); - Value *RTC = buildRTC(Builder, NodeBuilder.getExprBuilder()); + ExprBuilder.setTrackOverflow(true); + Value *RTC = buildRTC(Builder, ExprBuilder); + Value *OverflowHappened = Builder.CreateNot( + ExprBuilder.getOverflowState(), "polly.rtc.overflown"); + RTC = Builder.CreateAnd(RTC, OverflowHappened, "polly.rtc.result"); + ExprBuilder.setTrackOverflow(false); + Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC); Builder.SetInsertPoint(&StartBlock->front()); diff --git a/polly/lib/CodeGen/IslExprBuilder.cpp b/polly/lib/CodeGen/IslExprBuilder.cpp index 912eb3984b6..b8c8e6557c9 100644 --- a/polly/lib/CodeGen/IslExprBuilder.cpp +++ b/polly/lib/CodeGen/IslExprBuilder.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "polly/CodeGen/IslExprBuilder.h" +#include "polly/Options.h" #include "polly/ScopInfo.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ScopHelper.h" @@ -19,6 +20,123 @@ using namespace llvm; using namespace polly; +/// @brief Different overflow tracking modes. +enum OverflowTrackingChoice { + OT_NEVER, ///< Never tack potential overflows. + OT_REQUEST, ///< Track potential overflows if requested. + OT_ALWAYS ///< Always track potential overflows. +}; + +static cl::opt<OverflowTrackingChoice> OTMode( + "polly-overflow-tracking", + cl::desc("Define where potential integer overflows in generated " + "expressions should be tracked."), + cl::values(clEnumValN(OT_NEVER, "never", "Never track the overflow bit."), + clEnumValN(OT_REQUEST, "request", + "Track the overflow bit if requested."), + clEnumValN(OT_ALWAYS, "always", + "Always track the overflow bit."), + clEnumValEnd), + cl::Hidden, cl::init(OT_REQUEST), cl::ZeroOrMore, cl::cat(PollyCategory)); + +IslExprBuilder::IslExprBuilder(Scop &S, PollyIRBuilder &Builder, + IDToValueTy &IDToValue, ValueMapT &GlobalMap, + const DataLayout &DL, ScalarEvolution &SE, + DominatorTree &DT, LoopInfo &LI) + : S(S), Builder(Builder), IDToValue(IDToValue), GlobalMap(GlobalMap), + DL(DL), SE(SE), DT(DT), LI(LI) { + OverflowState = (OTMode == OT_ALWAYS) ? Builder.getFalse() : nullptr; +} + +void IslExprBuilder::setTrackOverflow(bool Enable) { + // If potential overflows are tracked always or never we ignore requests + // to change the behaviour. + if (OTMode != OT_REQUEST) + return; + + if (Enable) { + // If tracking should be enabled initialize the OverflowState. + OverflowState = Builder.getFalse(); + } else { + // If tracking should be disabled just unset the OverflowState. + OverflowState = nullptr; + } +} + +Value *IslExprBuilder::getOverflowState() const { + // If the overflow tracking was requested but it is disabled we avoid the + // additional nullptr checks at the call sides but instead provide a + // meaningful result. + if (OTMode == OT_NEVER) + return Builder.getFalse(); + return OverflowState; +} + +Value *IslExprBuilder::createBinOp(BinaryOperator::BinaryOps Opc, Value *LHS, + Value *RHS, const Twine &Name) { + // Handle the plain operation (without overflow tracking) first. + if (!OverflowState) { + switch (Opc) { + case Instruction::Add: + return Builder.CreateNSWAdd(LHS, RHS, Name); + case Instruction::Sub: + return Builder.CreateNSWSub(LHS, RHS, Name); + case Instruction::Mul: + return Builder.CreateNSWMul(LHS, RHS, Name); + default: + llvm_unreachable("Unknown binary operator!"); + } + } + + Function *F = nullptr; + Module *M = Builder.GetInsertBlock()->getModule(); + switch (Opc) { + case Instruction::Add: + F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow, + {LHS->getType()}); + break; + case Instruction::Sub: + F = Intrinsic::getDeclaration(M, Intrinsic::ssub_with_overflow, + {LHS->getType()}); + break; + case Instruction::Mul: + F = Intrinsic::getDeclaration(M, Intrinsic::smul_with_overflow, + {LHS->getType()}); + break; + default: + llvm_unreachable("No overflow intrinsic for binary operator found!"); + } + + auto *ResultStruct = Builder.CreateCall(F, {LHS, RHS}, Name); + assert(ResultStruct->getType()->isStructTy()); + + auto *OverflowFlag = + Builder.CreateExtractValue(ResultStruct, 1, Name + ".obit"); + + // If all overflows are tracked we do not combine the results as this could + // cause dominance problems. Instead we will always keep the last overflow + // flag as current state. + if (OTMode == OT_ALWAYS) + OverflowState = OverflowFlag; + else + OverflowState = + Builder.CreateOr(OverflowState, OverflowFlag, "polly.overflow.state"); + + return Builder.CreateExtractValue(ResultStruct, 0, Name + ".res"); +} + +Value *IslExprBuilder::createAdd(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Add, LHS, RHS, Name); +} + +Value *IslExprBuilder::createSub(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Sub, LHS, RHS, Name); +} + +Value *IslExprBuilder::createMul(Value *LHS, Value *RHS, const Twine &Name) { + return createBinOp(Instruction::Mul, LHS, RHS, Name); +} + Type *IslExprBuilder::getWidestType(Type *T1, Type *T2) { assert(isa<IntegerType>(T1) && isa<IntegerType>(T2)); @@ -142,8 +260,7 @@ Value *IslExprBuilder::createAccessAddress(isl_ast_expr *Expr) { if (Ty != IndexOp->getType()) IndexOp = Builder.CreateIntCast(IndexOp, Ty, true); - IndexOp = - Builder.CreateAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); + IndexOp = createAdd(IndexOp, NextIndex, "polly.access.add." + BaseName); } // For every but the last dimension multiply the size, for the last @@ -167,8 +284,7 @@ Value *IslExprBuilder::createAccessAddress(isl_ast_expr *Expr) { if (Ty != DimSize->getType()) DimSize = Builder.CreateSExtOrTrunc(DimSize, Ty, "polly.access.sext." + BaseName); - IndexOp = - Builder.CreateMul(IndexOp, DimSize, "polly.access.mul." + BaseName); + IndexOp = createMul(IndexOp, DimSize, "polly.access.mul." + BaseName); } Access = Builder.CreateGEP(Base, IndexOp, "polly.access." + BaseName); @@ -237,13 +353,13 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) { default: llvm_unreachable("This is no binary isl ast expression"); case isl_ast_op_add: - Res = Builder.CreateNSWAdd(LHS, RHS); + Res = createAdd(LHS, RHS); break; case isl_ast_op_sub: - Res = Builder.CreateNSWSub(LHS, RHS); + Res = createSub(LHS, RHS); break; case isl_ast_op_mul: - Res = Builder.CreateNSWMul(LHS, RHS); + Res = createMul(LHS, RHS); break; case isl_ast_op_div: Res = Builder.CreateSDiv(LHS, RHS, "pexp.div", true); @@ -265,8 +381,8 @@ Value *IslExprBuilder::createOpBin(__isl_take isl_ast_expr *Expr) { // floord(n,d) ((n < 0) ? (n - d + 1) : n) / d Value *One = ConstantInt::get(MaxType, 1); Value *Zero = ConstantInt::get(MaxType, 0); - Value *Sum1 = Builder.CreateSub(LHS, RHS, "pexp.fdiv_q.0"); - Value *Sum2 = Builder.CreateAdd(Sum1, One, "pexp.fdiv_q.1"); + Value *Sum1 = createSub(LHS, RHS, "pexp.fdiv_q.0"); + Value *Sum2 = createAdd(Sum1, One, "pexp.fdiv_q.1"); Value *isNegative = Builder.CreateICmpSLT(LHS, Zero, "pexp.fdiv_q.2"); Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS, "pexp.fdiv_q.3"); diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp index 601a2f83278..ccec67ea547 100644 --- a/polly/lib/CodeGen/IslNodeBuilder.cpp +++ b/polly/lib/CodeGen/IslNodeBuilder.cpp @@ -1001,7 +1001,13 @@ Value *IslNodeBuilder::preloadInvariantLoad(const MemoryAccess &MA, isl_ast_expr *DomainCond = isl_ast_build_expr_from_set(Build, Domain); Domain = nullptr; + ExprBuilder.setTrackOverflow(true); Value *Cond = ExprBuilder.create(DomainCond); + Value *OverflowHappened = Builder.CreateNot(ExprBuilder.getOverflowState(), + "polly.preload.cond.overflown"); + Cond = Builder.CreateAnd(Cond, OverflowHappened, "polly.preload.cond.result"); + ExprBuilder.setTrackOverflow(false); + if (!Cond->getType()->isIntegerTy(1)) Cond = Builder.CreateIsNotNull(Cond); diff --git a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll index df362840430..74c0823b317 100644 --- a/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll +++ b/polly/test/Isl/CodeGen/OpenMP/new_multidim_access.ll @@ -19,15 +19,15 @@ ; CHECK: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 2i1] }; ; CHECK: new: [n, m] -> { Stmt_bb4[i0, i1] -> MemRef_A[i0, 43 + i1] }; -; IR: %polly.access.mul.polly.subfunc.arg.A = mul i64 %polly.indvar, %polly.subfunc.arg.m +; IR: %polly.access.mul.polly.subfunc.arg.A = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m ; IR: %6 = add nsw i64 %polly.indvar5, 13 -; IR: %polly.access.add.polly.subfunc.arg.A = add i64 %polly.access.mul.polly.subfunc.arg.A, %6 +; IR: %polly.access.add.polly.subfunc.arg.A = add nsw i64 %polly.access.mul.polly.subfunc.arg.A, %6 ; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A ; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3 -; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul i64 %polly.indvar, %polly.subfunc.arg.m +; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul nsw i64 %polly.indvar, %polly.subfunc.arg.m ; IR: %7 = add nsw i64 %polly.indvar5, 43 -; IR: %polly.access.add.polly.subfunc.arg.A9 = add i64 %polly.access.mul.polly.subfunc.arg.A8, %7 +; IR: %polly.access.add.polly.subfunc.arg.A9 = add nsw i64 %polly.access.mul.polly.subfunc.arg.A8, %7 ; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9 ; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/aliasing_multidimensional_access.ll b/polly/test/Isl/CodeGen/aliasing_multidimensional_access.ll index 919f1e77615..9048c7588f4 100644 --- a/polly/test/Isl/CodeGen/aliasing_multidimensional_access.ll +++ b/polly/test/Isl/CodeGen/aliasing_multidimensional_access.ll @@ -1,12 +1,28 @@ ; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s ; -; Check that we calculate the maximal access into array A correctly. +; Check that we calculate the maximal access into array A correctly and track the overflow state. ; -; CHECK: %[[TMP0:[._0-9a-zA-Z]*]] = mul i64 99, %m -; CHECK: %[[TMP1:[._0-9a-zA-Z]*]] = add i64 %[[TMP0]], 149 -; CHECK: %[[TMP2:[._0-9a-zA-Z]*]] = mul i64 %[[TMP1]], %p -; CHECK: %[[TMP3:[._0-9a-zA-Z]*]] = add i64 %[[TMP2]], 150 -; CHECK: %polly.access.A{{[0-9]*}} = getelementptr double, double* %A, i64 %[[TMP3]] +; CHECK: %[[TMP0:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 99, i64 %m) +; CHECK: %[[TMP0O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP0]], 1 +; CHECK: %[[OS0:[._0-9a-zA-Z]*]] = or i1 {{[^,]*}}, %[[TMP0O]] +; CHECK: %[[TMP0R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP0]], 0 +; CHECK: %[[TMP1:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[TMP0R]], i64 149) +; CHECK: %[[TMP1O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP1]], 1 +; CHECK: %[[OS1:[._0-9a-zA-Z]*]] = or i1 %[[OS0]], %[[TMP1O]] +; CHECK: %[[TMP1R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP1]], 0 +; CHECK: %[[TMP2:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %[[TMP1R]], i64 %p) +; CHECK: %[[TMP2O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP2]], 1 +; CHECK: %[[OS2:[._0-9a-zA-Z]*]] = or i1 %[[OS1]], %[[TMP2O]] +; CHECK: %[[TMP2R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP2]], 0 +; CHECK: %[[TMP3:[._0-9a-zA-Z]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[TMP2R]], i64 150) +; CHECK: %[[TMP3O:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP3]], 1 +; CHECK: %[[OS3:[._0-9a-zA-Z]*]] = or i1 %[[OS2]], %[[TMP3O]] +; CHECK: %[[TMP3R:[._0-9a-zA-Z]*]] = extractvalue { i64, i1 } %[[TMP3]], 0 +; CHECK: %polly.access.A{{[0-9]*}} = getelementptr double, double* %A, i64 %[[TMP3R]] +; +; CHECK: %polly.rtc.overflown = xor i1 %[[OS3]], true +; CHECK: %polly.rtc.result = and i1 %{{[^,]*}}, %polly.rtc.overflown +; CHECK: br i1 %polly.rtc.result, ; ; void foo(long n, long m, long p, double A[n][m][p], int *B) { ; for (long i = 0; i < 100; i++) diff --git a/polly/test/Isl/CodeGen/aliasing_parametric_simple_1.ll b/polly/test/Isl/CodeGen/aliasing_parametric_simple_1.ll index 1d5ad9a0f17..eaf5c7f2e13 100644 --- a/polly/test/Isl/CodeGen/aliasing_parametric_simple_1.ll +++ b/polly/test/Isl/CodeGen/aliasing_parametric_simple_1.ll @@ -6,8 +6,11 @@ ; } ; ; CHECK: %[[Cext:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK: %[[Cp1:[._a-zA-Z0-9]*]] = add nsw i64 %[[Cext]], 1 -; CHECK: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[Cp1]] +; CHECK: %[[Cp1:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %[[Cext]], i64 1) +; CHECK: %[[Cp1O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[Cp1]], 1 +; CHECK: %[[OS:[._a-zA-Z0-9]*]] = or i1 false, %[[Cp1O]] +; CHECK: %[[Cp1R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[Cp1]], 0 +; CHECK: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[Cp1R]] ; CHECK: %[[AMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 0 ; CHECK: %[[BMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMax]] to i64 ; CHECK: %[[AMinI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[AMin]] to i64 @@ -19,7 +22,9 @@ ; CHECK: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i64 %[[AMaxI]], %[[BMinI]] ; CHECK: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] ; CHECK: %[[RTC:[._a-zA-Z0-9]*]] = and i1 true, %[[NoAlias]] -; CHECK: br i1 %[[RTC]], label %polly.start, label %for.cond +; CHECK: %[[NOV:[._a-zA-Z0-9]*]] = xor i1 %[[OS]], true +; CHECK: %[[RTCV:[._a-zA-Z0-9]*]] = and i1 %[[RTC]], %[[NOV]] +; CHECK: br i1 %[[RTCV]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll b/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll index 9d8a90ca4ee..24b52ce80bd 100644 --- a/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll +++ b/polly/test/Isl/CodeGen/aliasing_parametric_simple_2.ll @@ -7,9 +7,12 @@ ; ; CHECK: %[[Ctx:[._a-zA-Z0-9]*]] = and i1 true ; CHECK-NEXT: %[[M0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK-NEXT: %[[M3:[._a-zA-Z0-9]*]] = sub nsw i64 %[[M0]], 9 -; CHECK-NEXT: %[[M1:[._a-zA-Z0-9]*]] = icmp sgt i64 6, %[[M3]] -; CHECK-NEXT: %[[M4:[._a-zA-Z0-9]*]] = select i1 %[[M1]], i64 6, i64 %[[M3]] +; CHECK-NEXT: %[[M3:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %[[M0]], i64 9) +; CHECK-NEXT: %[[M3O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[M3]], 1 +; CHECK-NEXT: %[[OS0:[._a-zA-Z0-9]*]] = or i1 false, %[[M3O]] +; CHECK-NEXT: %[[M3R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[M3]], 0 +; CHECK-NEXT: %[[M1:[._a-zA-Z0-9]*]] = icmp sgt i64 6, %[[M3R]] +; CHECK-NEXT: %[[M4:[._a-zA-Z0-9]*]] = select i1 %[[M1]], i64 6, i64 %[[M3R]] ; CHECK-NEXT: %[[BMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[M4]] ; CHECK-NEXT: %[[AMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 0 ; CHECK-NEXT: %[[BMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMax]] to i64 @@ -17,16 +20,21 @@ ; CHECK-NEXT: %[[BltA:[._a-zA-Z0-9]*]] = icmp ule i64 %[[BMaxI]], %[[AMinI]] ; CHECK-NEXT: %[[AMax:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %A, i64 1024 ; CHECK-NEXT: %[[m0:[._a-zA-Z0-9]*]] = sext i32 %c to i64 -; CHECK-NEXT: %[[m3:[._a-zA-Z0-9]*]] = sub nsw i64 %[[m0]], 10 -; CHECK-NEXT: %[[m1:[._a-zA-Z0-9]*]] = icmp slt i64 5, %[[m3]] -; CHECK-NEXT: %[[m4:[._a-zA-Z0-9]*]] = select i1 %[[m1]], i64 5, i64 %[[m3]] +; CHECK-NEXT: %[[m3:[._a-zA-Z0-9]*]] = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %[[m0]], i64 10) +; CHECK-NEXT: %[[m3O:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[m3]], 1 +; CHECK-NEXT: %[[OS1:[._a-zA-Z0-9]*]] = or i1 %[[OS0]], %[[m3O]] +; CHECK-NEXT: %[[m3R:[._a-zA-Z0-9]*]] = extractvalue { i64, i1 } %[[m3]], 0 +; CHECK-NEXT: %[[m1:[._a-zA-Z0-9]*]] = icmp slt i64 5, %[[m3R]] +; CHECK-NEXT: %[[m4:[._a-zA-Z0-9]*]] = select i1 %[[m1]], i64 5, i64 %[[m3R]] ; CHECK-NEXT: %[[BMin:[._a-zA-Z0-9]*]] = getelementptr i32, i32* %B, i64 %[[m4]] ; CHECK-NEXT: %[[AMaxI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[AMax]] to i64 ; CHECK-NEXT: %[[BMinI:[._a-zA-Z0-9]*]] = ptrtoint i32* %[[BMin]] to i64 ; CHECK-NEXT: %[[AltB:[._a-zA-Z0-9]*]] = icmp ule i64 %[[AMaxI]], %[[BMinI]] ; CHECK-NEXT: %[[NoAlias:[._a-zA-Z0-9]*]] = or i1 %[[BltA]], %[[AltB]] ; CHECK-NEXT: %[[RTC:[._a-zA-Z0-9]*]] = and i1 %[[Ctx]], %[[NoAlias]] -; CHECK-NEXT: br i1 %[[RTC]], label %polly.start, label %for.cond +; CHECK-NEXT: %[[NOV:[._a-zA-Z0-9]*]] = xor i1 %[[OS1]], true +; CHECK-NEXT: %[[RTCV:[._a-zA-Z0-9]*]] = and i1 %[[RTC]], %[[NOV]] +; CHECK-NEXT: br i1 %[[RTCV]], label %polly.start, label %for.cond ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/exprModDiv.ll b/polly/test/Isl/CodeGen/exprModDiv.ll index ceedaa09d92..43117819aa6 100644 --- a/polly/test/Isl/CodeGen/exprModDiv.ll +++ b/polly/test/Isl/CodeGen/exprModDiv.ll @@ -18,7 +18,7 @@ ; A[i % 127] ; CHECK: %pexp.pdiv_r = urem i64 %polly.indvar, 127 -; CHECK: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; CHECK: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r ; A[floor(i / 127)] ; @@ -28,41 +28,41 @@ ; each value of i to indeed be mapped to a value. ; ; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127 -; CHECK: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q +; CHECK: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 127 * floord(-p - 1, 127) + 127] -; CHECK: %pexp.fdiv_q.0 = sub i64 %p, 127 -; CHECK: %pexp.fdiv_q.1 = add i64 %pexp.fdiv_q.0, 1 +; CHECK: %pexp.fdiv_q.0 = sub nsw i64 %p, 127 +; CHECK: %pexp.fdiv_q.1 = add nsw i64 %pexp.fdiv_q.0, 1 ; CHECK: %pexp.fdiv_q.2 = icmp slt i64 %p, 0 ; CHECK: %pexp.fdiv_q.3 = select i1 %pexp.fdiv_q.2, i64 %pexp.fdiv_q.1, i64 %p ; CHECK: %pexp.fdiv_q.4 = sdiv i64 %pexp.fdiv_q.3, 127 ; CHECK: %[[r1:[0-9]*]] = mul nsw i64 127, %pexp.fdiv_q.4 ; CHECK: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]] -; CHECK: %polly.access.A8 = getelementptr float, float* %A, i64 %[[r2]] +; CHECK: %polly.access.A11 = getelementptr float, float* %A, i64 %[[r2]] ; A[p / 127] ; CHECK: %pexp.div = sdiv exact i64 %p, 127 -; CHECK: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div +; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div ; A[i % 128] ; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128 -; POW2: %polly.access.A6 = getelementptr float, float* %A, i64 %pexp.pdiv_r +; POW2: %polly.access.A9 = getelementptr float, float* %A, i64 %pexp.pdiv_r ; A[floor(i / 128)] ; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128 -; POW2: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q +; POW2: %polly.access.B10 = getelementptr float, float* %B, i64 %pexp.p_div_q ; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d ; A[p + 128 * floord(-p - 1, 128) + 128] ; POW2: %polly.fdiv_q.shr = ashr i64 %p, 7 ; POW2: %[[r1:[0-9]*]] = mul nsw i64 128, %polly.fdiv_q.shr ; POW2: %[[r2:[0-9]*]] = sub nsw i64 %p, %[[r1]] -; POW2: %polly.access.A8 = getelementptr float, float* %A, i64 %[[r2]] +; POW2: %polly.access.A11 = getelementptr float, float* %A, i64 %[[r2]] ; A[p / 128] ; POW2: %pexp.div = sdiv exact i64 %p, 128 -; POW2: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div +; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll b/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll index cc93187d9f5..048b7fc383a 100644 --- a/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll +++ b/polly/test/Isl/CodeGen/invariant_load_base_pointer_conditional_2.ll @@ -1,5 +1,6 @@ ; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s ; RUN: opt %loadPolly -S -polly-codegen < %s | FileCheck %s --check-prefix=IR +; RUN: opt %loadPolly -S -polly-codegen --polly-overflow-tracking=always < %s | FileCheck %s --check-prefix=IRA ; ; As (p + q) can overflow we have to check that we load from ; I[p + q] only if it does not. @@ -20,23 +21,61 @@ ; IR-NEXT: %13 = icmp sge i64 %12, 1 ; IR-NEXT: %14 = sext i32 %q to i64 ; IR-NEXT: %15 = sext i32 %p to i64 -; IR-NEXT: %16 = add nsw i64 %15, %14 -; IR-NEXT: %17 = icmp sle i64 %16, 2147483647 +; IR-NEXT: %16 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %15, i64 %14) +; IR-NEXT: %.obit4 = extractvalue { i64, i1 } %16, 1 +; IR-NEXT: %polly.overflow.state5 = or i1 false, %.obit4 +; IR-NEXT: %.res6 = extractvalue { i64, i1 } %16, 0 +; IR-NEXT: %17 = icmp sle i64 %.res6, 2147483647 ; IR-NEXT: %18 = and i1 %13, %17 ; IR-NEXT: %19 = sext i32 %q to i64 ; IR-NEXT: %20 = sext i32 %p to i64 -; IR-NEXT: %21 = add nsw i64 %20, %19 -; IR-NEXT: %22 = icmp sge i64 %21, -2147483648 +; IR-NEXT: %21 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %20, i64 %19) +; IR-NEXT: %.obit7 = extractvalue { i64, i1 } %21, 1 +; IR-NEXT: %polly.overflow.state8 = or i1 %polly.overflow.state5, %.obit7 +; IR-NEXT: %.res9 = extractvalue { i64, i1 } %21, 0 +; IR-NEXT: %22 = icmp sge i64 %.res9, -2147483648 ; IR-NEXT: %23 = and i1 %18, %22 -; IR-NEXT: br label %polly.preload.cond1 +; IR-NEXT: %polly.preload.cond.overflown10 = xor i1 %polly.overflow.state8, true +; IR-NEXT: %polly.preload.cond.result11 = and i1 %23, %polly.preload.cond.overflown10 +; IR-NEXT: br label %polly.preload.cond12 ; -; IR: polly.preload.cond1: -; IR-NEXT: br i1 %23 +; IR: polly.preload.cond12: +; IR-NEXT: br i1 %polly.preload.cond.result11 ; -; IR: polly.preload.exec3: +; IR: polly.preload.exec14: ; IR-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 ; IR-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 ; +; IRA: polly.preload.merge: +; IRA-NEXT: %polly.preload.tmp1.merge = phi i32* [ %polly.access.I.load, %polly.preload.exec ], [ null, %polly.preload.cond ] +; IRA-NEXT: store i32* %polly.preload.tmp1.merge, i32** %tmp1.preload.s2a +; IRA-NEXT: %12 = sext i32 %N to i64 +; IRA-NEXT: %13 = icmp sge i64 %12, 1 +; IRA-NEXT: %14 = sext i32 %q to i64 +; IRA-NEXT: %15 = sext i32 %p to i64 +; IRA-NEXT: %16 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %15, i64 %14) +; IRA-NEXT: %.obit5 = extractvalue { i64, i1 } %16, 1 +; IRA-NEXT: %.res6 = extractvalue { i64, i1 } %16, 0 +; IRA-NEXT: %17 = icmp sle i64 %.res6, 2147483647 +; IRA-NEXT: %18 = and i1 %13, %17 +; IRA-NEXT: %19 = sext i32 %q to i64 +; IRA-NEXT: %20 = sext i32 %p to i64 +; IRA-NEXT: %21 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %20, i64 %19) +; IRA-NEXT: %.obit7 = extractvalue { i64, i1 } %21, 1 +; IRA-NEXT: %.res8 = extractvalue { i64, i1 } %21, 0 +; IRA-NEXT: %22 = icmp sge i64 %.res8, -2147483648 +; IRA-NEXT: %23 = and i1 %18, %22 +; IRA-NEXT: %polly.preload.cond.overflown9 = xor i1 %.obit7, true +; IRA-NEXT: %polly.preload.cond.result10 = and i1 %23, %polly.preload.cond.overflown9 +; IRA-NEXT: br label %polly.preload.cond11 +; +; IRA: polly.preload.cond11: +; IRA-NEXT: br i1 %polly.preload.cond.result10 +; +; IRA: polly.preload.exec13: +; IRA-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 +; IRA-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 +; ; void f(int **I, int *A, int N, int p, int q) { ; for (int i = 0; i < N; i++) ; A[i] = *(I[p + q]); diff --git a/polly/test/Isl/CodeGen/no-overflow-tracking.ll b/polly/test/Isl/CodeGen/no-overflow-tracking.ll new file mode 100644 index 00000000000..3a7d7c0b3b6 --- /dev/null +++ b/polly/test/Isl/CodeGen/no-overflow-tracking.ll @@ -0,0 +1,73 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; RUN: opt %loadPolly -S -polly-codegen -polly-overflow-tracking=never < %s | FileCheck %s --check-prefix=IR +; +; As (p + q) can overflow we have to check that we load from +; I[p + q] only if it does not. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_I[p + q] }; +; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 and -2147483648 - p <= q <= 2147483647 - p } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [N, p, q] -> { Stmt_for_body[i0] -> MemRef_tmp1[0] }; +; CHECK-NEXT: Execution Context: [N, p, q] -> { : N > 0 } +; CHECK-NEXT: } +; +; IR: polly.preload.merge: +; IR-NEXT: %polly.preload.tmp1.merge = phi i32* [ %polly.access.I.load, %polly.preload.exec ], [ null, %polly.preload.cond ] +; IR-NEXT: store i32* %polly.preload.tmp1.merge, i32** %tmp1.preload.s2a +; IR-NEXT: %12 = sext i32 %N to i64 +; IR-NEXT: %13 = icmp sge i64 %12, 1 +; IR-NEXT: %14 = sext i32 %q to i64 +; IR-NEXT: %15 = sext i32 %p to i64 +; IR-NEXT: %16 = add nsw i64 %15, %14 +; IR-NEXT: %17 = icmp sle i64 %16, 2147483647 +; IR-NEXT: %18 = and i1 %13, %17 +; IR-NEXT: %19 = sext i32 %q to i64 +; IR-NEXT: %20 = sext i32 %p to i64 +; IR-NEXT: %21 = add nsw i64 %20, %19 +; IR-NEXT: %22 = icmp sge i64 %21, -2147483648 +; IR-NEXT: %23 = and i1 %18, %22 +; IR-NEXT: br label %polly.preload.cond1 +; +; IR: polly.preload.cond1: +; IR-NEXT: br i1 %23 +; +; IR: polly.preload.exec3: +; IR-NEXT: %polly.access.polly.preload.tmp1.merge = getelementptr i32, i32* %polly.preload.tmp1.merge, i64 0 +; IR-NEXT: %polly.access.polly.preload.tmp1.merge.load = load i32, i32* %polly.access.polly.preload.tmp1.merge, align 4 +; +; void f(int **I, int *A, int N, int p, int q) { +; for (int i = 0; i < N; i++) +; A[i] = *(I[p + q]); +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32** %I, i32* %A, i32 %N, i32 %p, i32 %q) { +entry: + %tmp = sext i32 %N to i64 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i64 %indvars.iv, %tmp + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add = add i32 %p, %q + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i32*, i32** %I, i64 %idxprom + %tmp1 = load i32*, i32** %arrayidx, align 8 + %tmp2 = load i32, i32* %tmp1, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + store i32 %tmp2, i32* %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |

