diff options
-rw-r--r-- | polly/lib/Support/SCEVAffinator.cpp | 39 | ||||
-rw-r--r-- | polly/lib/Support/SCEVValidator.cpp | 18 | ||||
-rw-r--r-- | polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll | 2 | ||||
-rw-r--r-- | polly/test/ScopInfo/multiple-types-non-affine-2.ll | 6 | ||||
-rw-r--r-- | polly/test/ScopInfo/multiple-types-non-affine.ll | 6 | ||||
-rw-r--r-- | polly/test/ScopInfo/truncate-1.ll | 45 | ||||
-rw-r--r-- | polly/test/ScopInfo/truncate-2.ll | 43 | ||||
-rw-r--r-- | polly/test/ScopInfo/zero_ext_of_truncate.ll | 53 | ||||
-rw-r--r-- | polly/test/ScopInfo/zero_ext_of_truncate_2.ll | 54 |
10 files changed, 239 insertions, 29 deletions
diff --git a/polly/lib/Support/SCEVAffinator.cpp b/polly/lib/Support/SCEVAffinator.cpp index 8deb26e0fcc..0998584cb21 100644 --- a/polly/lib/Support/SCEVAffinator.cpp +++ b/polly/lib/Support/SCEVAffinator.cpp @@ -39,11 +39,19 @@ static int const MaxDisjunctionsInPwAff = 100; // The maximal number of bits for which a zero-extend is modeled precisely. static unsigned const MaxZextSmallBitWidth = 7; +// The maximal number of bits for which a truncate is modeled precisely. +static unsigned const MaxTruncateSmallBitWidth = 31; + /// @brief Return true if a zero-extend from @p Width bits is precisely modeled. static bool isPreciseZeroExtend(unsigned Width) { return Width <= MaxZextSmallBitWidth; } +/// @brief Return true if a truncate from @p Width bits is precisely modeled. +static bool isPreciseTruncate(unsigned Width) { + return Width <= MaxTruncateSmallBitWidth; +} + /// @brief Add the number of basic sets in @p Domain to @p User static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) { auto *NumBasicSets = static_cast<unsigned *>(User); @@ -291,7 +299,33 @@ __isl_give PWACtx SCEVAffinator::visitConstant(const SCEVConstant *Expr) { __isl_give PWACtx SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) { - llvm_unreachable("SCEVTruncateExpr not yet supported"); + // Truncate operations are basically modulo operations, thus we can + // model them that way. However, for large types we assume the operand + // to fit in the new type size instead of introducing a modulo with a very + // large constant. + + auto *Op = Expr->getOperand(); + auto OpPWAC = visit(Op); + + unsigned Width = TD.getTypeSizeInBits(Expr->getType()); + bool Precise = isPreciseTruncate(Width); + + if (Precise) { + OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType()); + return OpPWAC; + } + + auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first)); + auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom); + auto *GreaterDom = + isl_pw_aff_ge_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_copy(ExpPWA)); + auto *SmallerDom = + isl_pw_aff_lt_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_neg(ExpPWA)); + auto *OutOfBoundsDom = isl_set_union(SmallerDom, GreaterDom); + OpPWAC.second = isl_set_union(OpPWAC.second, isl_set_copy(OutOfBoundsDom)); + S->recordAssumption(UNSIGNED, OutOfBoundsDom, DebugLoc(), AS_RESTRICTION, BB); + + return OpPWAC; } __isl_give PWACtx @@ -352,8 +386,7 @@ SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { auto OpPWAC = visit(Op); if (OpCanWrap) - OpPWAC.first = - addModuloSemantic(OpPWAC.first, Expr->getOperand()->getType()); + OpPWAC.first = addModuloSemantic(OpPWAC.first, Op->getType()); // If the width is to big we assume the negative part does not occur. if (!Precise) { diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 838bfe9827f..844324434ce 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -136,23 +136,7 @@ public: } class ValidatorResult visitTruncateExpr(const SCEVTruncateExpr *Expr) { - ValidatorResult Op = visit(Expr->getOperand()); - - switch (Op.getType()) { - case SCEVType::INT: - case SCEVType::PARAM: - // We currently do not represent a truncate expression as an affine - // expression. If it is constant during Scop execution, we treat it as a - // parameter. - return ValidatorResult(SCEVType::PARAM, Expr); - case SCEVType::IV: - DEBUG(dbgs() << "INVALID: Truncation of SCEVType::IV expression"); - return ValidatorResult(SCEVType::INVALID); - case SCEVType::INVALID: - return Op; - } - - llvm_unreachable("Unknown SCEVType"); + return visit(Expr->getOperand()); } class ValidatorResult visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { diff --git a/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll b/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll index 5fe9a02f31d..b98846a469e 100644 --- a/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll +++ b/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll @@ -7,8 +7,6 @@ ; ; CHECK: polly.split_new_and_old: ; CHECK-NOT: = sdiv i64 0, -4 -; CHECK: %div43polly = sdiv i64 %param, 2 -; CHECK: %div44polly = sdiv i64 %div43polly, 2 ; target triple = "x86_64-unknown-linux-gnu" diff --git a/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll b/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll index 27a4134d5c1..a1285aeb5de 100644 --- a/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll +++ b/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll @@ -1,7 +1,7 @@ ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s ; RUN: opt %loadPolly -polly-codegen -analyze < %s ; -; CHECK: Execution Context: [p_0] -> { : } +; CHECK: Execution Context: [p_0_loaded_from_currpc] -> { : } ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/polly/test/ScopInfo/multiple-types-non-affine-2.ll b/polly/test/ScopInfo/multiple-types-non-affine-2.ll index 8c9ac430812..e1360fffbe3 100644 --- a/polly/test/ScopInfo/multiple-types-non-affine-2.ll +++ b/polly/test/ScopInfo/multiple-types-non-affine-2.ll @@ -24,15 +24,15 @@ ; CHECK-NEXT: Schedule := ; CHECK-NEXT: { Stmt_bb2[i0] -> [i0] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 16 }; +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 32*floor((8 + i0)/16) = o0 + 16*floor((i0)/8) and -14 + 2i0 - o0 <= 16*floor((i0)/8) <= 16 + 2i0 - o0 } ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 2i0 <= o0 <= 1 + 2i0 }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 0 <= o0 <= 32 }; +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 64*floor((8 + i0)/16) = o0 + 32*floor((i0)/8) and -28 + 4i0 - o0 <= 32*floor((i0)/8) <= 32 + 4i0 - o0 } ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 4i0 <= o0 <= 3 + 4i0 }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 9 }; +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) } ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[i0] }; ; CHECK-NEXT: } diff --git a/polly/test/ScopInfo/multiple-types-non-affine.ll b/polly/test/ScopInfo/multiple-types-non-affine.ll index 06ce53a5978..213c5f798a4 100644 --- a/polly/test/ScopInfo/multiple-types-non-affine.ll +++ b/polly/test/ScopInfo/multiple-types-non-affine.ll @@ -24,15 +24,15 @@ ; CHECK-NEXT: Schedule := ; CHECK-NEXT: { Stmt_bb2[i0] -> [i0] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 9 }; +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) } ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[i0] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[o0] : 0 <= o0 <= 9 }; +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) } ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[i0] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] -; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 15 }; +; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : -7 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) } ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[i0] }; ; CHECK-NEXT: } diff --git a/polly/test/ScopInfo/truncate-1.ll b/polly/test/ScopInfo/truncate-1.ll new file mode 100644 index 00000000000..c62de8fe9d8 --- /dev/null +++ b/polly/test/ScopInfo/truncate-1.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; void f(char *A, short N) { +; for (char i = 0; i < (char)N; i++) +; A[i]++; +; } +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N] -> { : 1 = 0 } +; +; CHECK: Domain := +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 256*floor((128 + N)/256) < N - i0 }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i8* %A, i16 signext %N) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i8 [ 0, %entry ], [ %inc4, %for.inc ] + %conv = sext i8 %i.0 to i32 + %conv1 = zext i16 %N to i32 + %sext = shl i32 %conv1, 24 + %conv2 = ashr exact i32 %sext, 24 + %cmp = icmp slt i32 %conv, %conv2 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %idxprom = sext i8 %i.0 to i64 + %arrayidx = getelementptr inbounds i8, i8* %A, i64 %idxprom + %tmp = load i8, i8* %arrayidx, align 1 + %inc = add i8 %tmp, 1 + store i8 %inc, i8* %arrayidx, align 1 + br label %for.inc + +for.inc: ; preds = %for.body + %inc4 = add nsw i8 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/truncate-2.ll b/polly/test/ScopInfo/truncate-2.ll new file mode 100644 index 00000000000..1d924fbb64e --- /dev/null +++ b/polly/test/ScopInfo/truncate-2.ll @@ -0,0 +1,43 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; void f(char *A, short N) { +; for (short i = 0; i < N; i++) +; A[(char)(N)]++; +; } +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N] -> { : 1 = 0 } +; +; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 }; +; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0] +; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i8* %A, i16 signext %N) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i16 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %cmp = icmp slt i16 %indvars.iv, %N + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %idxprom = trunc i16 %N to i8 + %arrayidx = getelementptr inbounds i8, i8* %A, i8 %idxprom + %tmp1 = load i8, i8* %arrayidx, align 1 + %inc = add i8 %tmp1, 1 + store i8 %inc, i8* %arrayidx, align 1 + br label %for.inc + +for.inc: ; preds = %for.body + %indvars.iv.next = add nuw nsw i16 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/zero_ext_of_truncate.ll b/polly/test/ScopInfo/zero_ext_of_truncate.ll new file mode 100644 index 00000000000..6556c50a76d --- /dev/null +++ b/polly/test/ScopInfo/zero_ext_of_truncate.ll @@ -0,0 +1,53 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; void f(unsigned *restrict I, unsigned *restrict A, unsigned N, unsigned M) { +; for (unsigned i = 0; i < N; i++) { +; unsigned char V = *I; +; if (V < M) +; A[i]++; +; } +; } +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N, tmp, M] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) } +; +; CHECK: Domain := +; CHECK-NEXT: [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* noalias %I, i32* noalias %A, i32 %N, i32 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp = load i32, i32* %I, align 4 + %conv1 = and i32 %tmp, 255 + %cmp2 = icmp ult i32 %conv1, %M + br i1 %cmp2, label %if.then, label %if.end + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp1 = load i32, i32* %arrayidx, align 4 + %inc = add i32 %tmp1, 1 + store i32 %inc, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} diff --git a/polly/test/ScopInfo/zero_ext_of_truncate_2.ll b/polly/test/ScopInfo/zero_ext_of_truncate_2.ll new file mode 100644 index 00000000000..a2af0cfc142 --- /dev/null +++ b/polly/test/ScopInfo/zero_ext_of_truncate_2.ll @@ -0,0 +1,54 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; void f(unsigned long *restrict I, unsigned *restrict A, unsigned N) { +; for (unsigned i = 0; i < N; i++) { +; unsigned V = *I; +; if (V < i) +; A[i]++; +; } +; } +; +; CHECK: Assumed Context: +; CHECK-NEXT: [N, tmp] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [N, tmp] -> { : N > 0 and (tmp < 0 or tmp >= 2147483648) } +; +; CHECK: Domain := +; CHECK-NEXT: [N, tmp] -> { Stmt_if_then[i0] : i0 > tmp and 0 <= i0 < N }; +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i64* noalias %I, i32* noalias %A, i32 %N, i32 %M) { +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %lftr.wideiv = trunc i64 %indvars.iv to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %N + br i1 %exitcond, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp = load i64, i64* %I, align 8 + %conv = trunc i64 %tmp to i32 + %tmp1 = zext i32 %conv to i64 + %cmp1 = icmp ult i64 %tmp1, %indvars.iv + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %tmp2 = load i32, i32* %arrayidx, align 4 + %inc = add i32 %tmp2, 1 + store i32 %inc, i32* %arrayidx, align 4 + br label %if.end + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} |