summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/Support/SCEVAffinator.cpp39
-rw-r--r--polly/lib/Support/SCEVValidator.cpp18
-rw-r--r--polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll2
-rw-r--r--polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll2
-rw-r--r--polly/test/ScopInfo/multiple-types-non-affine-2.ll6
-rw-r--r--polly/test/ScopInfo/multiple-types-non-affine.ll6
-rw-r--r--polly/test/ScopInfo/truncate-1.ll45
-rw-r--r--polly/test/ScopInfo/truncate-2.ll43
-rw-r--r--polly/test/ScopInfo/zero_ext_of_truncate.ll53
-rw-r--r--polly/test/ScopInfo/zero_ext_of_truncate_2.ll54
10 files changed, 239 insertions, 29 deletions
diff --git a/polly/lib/Support/SCEVAffinator.cpp b/polly/lib/Support/SCEVAffinator.cpp
index 8deb26e0fcc..0998584cb21 100644
--- a/polly/lib/Support/SCEVAffinator.cpp
+++ b/polly/lib/Support/SCEVAffinator.cpp
@@ -39,11 +39,19 @@ static int const MaxDisjunctionsInPwAff = 100;
// The maximal number of bits for which a zero-extend is modeled precisely.
static unsigned const MaxZextSmallBitWidth = 7;
+// The maximal number of bits for which a truncate is modeled precisely.
+static unsigned const MaxTruncateSmallBitWidth = 31;
+
/// @brief Return true if a zero-extend from @p Width bits is precisely modeled.
static bool isPreciseZeroExtend(unsigned Width) {
return Width <= MaxZextSmallBitWidth;
}
+/// @brief Return true if a truncate from @p Width bits is precisely modeled.
+static bool isPreciseTruncate(unsigned Width) {
+ return Width <= MaxTruncateSmallBitWidth;
+}
+
/// @brief Add the number of basic sets in @p Domain to @p User
static isl_stat addNumBasicSets(isl_set *Domain, isl_aff *Aff, void *User) {
auto *NumBasicSets = static_cast<unsigned *>(User);
@@ -291,7 +299,33 @@ __isl_give PWACtx SCEVAffinator::visitConstant(const SCEVConstant *Expr) {
__isl_give PWACtx
SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) {
- llvm_unreachable("SCEVTruncateExpr not yet supported");
+ // Truncate operations are basically modulo operations, thus we can
+ // model them that way. However, for large types we assume the operand
+ // to fit in the new type size instead of introducing a modulo with a very
+ // large constant.
+
+ auto *Op = Expr->getOperand();
+ auto OpPWAC = visit(Op);
+
+ unsigned Width = TD.getTypeSizeInBits(Expr->getType());
+ bool Precise = isPreciseTruncate(Width);
+
+ if (Precise) {
+ OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType());
+ return OpPWAC;
+ }
+
+ auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first));
+ auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom);
+ auto *GreaterDom =
+ isl_pw_aff_ge_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_copy(ExpPWA));
+ auto *SmallerDom =
+ isl_pw_aff_lt_set(isl_pw_aff_copy(OpPWAC.first), isl_pw_aff_neg(ExpPWA));
+ auto *OutOfBoundsDom = isl_set_union(SmallerDom, GreaterDom);
+ OpPWAC.second = isl_set_union(OpPWAC.second, isl_set_copy(OutOfBoundsDom));
+ S->recordAssumption(UNSIGNED, OutOfBoundsDom, DebugLoc(), AS_RESTRICTION, BB);
+
+ return OpPWAC;
}
__isl_give PWACtx
@@ -352,8 +386,7 @@ SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
auto OpPWAC = visit(Op);
if (OpCanWrap)
- OpPWAC.first =
- addModuloSemantic(OpPWAC.first, Expr->getOperand()->getType());
+ OpPWAC.first = addModuloSemantic(OpPWAC.first, Op->getType());
// If the width is to big we assume the negative part does not occur.
if (!Precise) {
diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp
index 838bfe9827f..844324434ce 100644
--- a/polly/lib/Support/SCEVValidator.cpp
+++ b/polly/lib/Support/SCEVValidator.cpp
@@ -136,23 +136,7 @@ public:
}
class ValidatorResult visitTruncateExpr(const SCEVTruncateExpr *Expr) {
- ValidatorResult Op = visit(Expr->getOperand());
-
- switch (Op.getType()) {
- case SCEVType::INT:
- case SCEVType::PARAM:
- // We currently do not represent a truncate expression as an affine
- // expression. If it is constant during Scop execution, we treat it as a
- // parameter.
- return ValidatorResult(SCEVType::PARAM, Expr);
- case SCEVType::IV:
- DEBUG(dbgs() << "INVALID: Truncation of SCEVType::IV expression");
- return ValidatorResult(SCEVType::INVALID);
- case SCEVType::INVALID:
- return Op;
- }
-
- llvm_unreachable("Unknown SCEVType");
+ return visit(Expr->getOperand());
}
class ValidatorResult visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
diff --git a/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll b/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll
index 5fe9a02f31d..b98846a469e 100644
--- a/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll
+++ b/polly/test/Isl/CodeGen/inner_scev_sdiv_2.ll
@@ -7,8 +7,6 @@
;
; CHECK: polly.split_new_and_old:
; CHECK-NOT: = sdiv i64 0, -4
-; CHECK: %div43polly = sdiv i64 %param, 2
-; CHECK: %div44polly = sdiv i64 %div43polly, 2
;
target triple = "x86_64-unknown-linux-gnu"
diff --git a/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll b/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll
index 27a4134d5c1..a1285aeb5de 100644
--- a/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll
+++ b/polly/test/ScopInfo/invariant_load_zextended_in_own_execution_context.ll
@@ -1,7 +1,7 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
; RUN: opt %loadPolly -polly-codegen -analyze < %s
;
-; CHECK: Execution Context: [p_0] -> { : }
+; CHECK: Execution Context: [p_0_loaded_from_currpc] -> { : }
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/polly/test/ScopInfo/multiple-types-non-affine-2.ll b/polly/test/ScopInfo/multiple-types-non-affine-2.ll
index 8c9ac430812..e1360fffbe3 100644
--- a/polly/test/ScopInfo/multiple-types-non-affine-2.ll
+++ b/polly/test/ScopInfo/multiple-types-non-affine-2.ll
@@ -24,15 +24,15 @@
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: { Stmt_bb2[i0] -> [i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 16 };
+; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 32*floor((8 + i0)/16) = o0 + 16*floor((i0)/8) and -14 + 2i0 - o0 <= 16*floor((i0)/8) <= 16 + 2i0 - o0 }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 2i0 <= o0 <= 1 + 2i0 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 0 <= o0 <= 32 };
+; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 64*floor((8 + i0)/16) = o0 + 32*floor((i0)/8) and -28 + 4i0 - o0 <= 32*floor((i0)/8) <= 32 + 4i0 - o0 }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Char[o0] : 4i0 <= o0 <= 3 + 4i0 };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 9 };
+; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[i0] };
; CHECK-NEXT: }
diff --git a/polly/test/ScopInfo/multiple-types-non-affine.ll b/polly/test/ScopInfo/multiple-types-non-affine.ll
index 06ce53a5978..213c5f798a4 100644
--- a/polly/test/ScopInfo/multiple-types-non-affine.ll
+++ b/polly/test/ScopInfo/multiple-types-non-affine.ll
@@ -24,15 +24,15 @@
; CHECK-NEXT: Schedule :=
; CHECK-NEXT: { Stmt_bb2[i0] -> [i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : 0 <= o0 <= 9 };
+; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Short[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[o0] : 0 <= o0 <= 9 };
+; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[o0] : -1 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Float[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
-; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : 0 <= o0 <= 15 };
+; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[o0] : -7 + o0 + 8*floor((i0)/8) <= 16*floor((8 + i0)/16) <= o0 + 8*floor((i0)/8) }
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bb2[i0] -> MemRef_Double[i0] };
; CHECK-NEXT: }
diff --git a/polly/test/ScopInfo/truncate-1.ll b/polly/test/ScopInfo/truncate-1.ll
new file mode 100644
index 00000000000..c62de8fe9d8
--- /dev/null
+++ b/polly/test/ScopInfo/truncate-1.ll
@@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(char *A, short N) {
+; for (char i = 0; i < (char)N; i++)
+; A[i]++;
+; }
+;
+; CHECK: Assumed Context:
+; CHECK-NEXT: [N] -> { : }
+; CHECK-NEXT: Invalid Context:
+; CHECK-NEXT: [N] -> { : 1 = 0 }
+;
+; CHECK: Domain :=
+; CHECK-NEXT: [N] -> { Stmt_for_body[i0] : i0 >= 0 and 256*floor((128 + N)/256) < N - i0 };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i8* %A, i16 signext %N) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %i.0 = phi i8 [ 0, %entry ], [ %inc4, %for.inc ]
+ %conv = sext i8 %i.0 to i32
+ %conv1 = zext i16 %N to i32
+ %sext = shl i32 %conv1, 24
+ %conv2 = ashr exact i32 %sext, 24
+ %cmp = icmp slt i32 %conv, %conv2
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %idxprom = sext i8 %i.0 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %A, i64 %idxprom
+ %tmp = load i8, i8* %arrayidx, align 1
+ %inc = add i8 %tmp, 1
+ store i8 %inc, i8* %arrayidx, align 1
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %inc4 = add nsw i8 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/polly/test/ScopInfo/truncate-2.ll b/polly/test/ScopInfo/truncate-2.ll
new file mode 100644
index 00000000000..1d924fbb64e
--- /dev/null
+++ b/polly/test/ScopInfo/truncate-2.ll
@@ -0,0 +1,43 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(char *A, short N) {
+; for (short i = 0; i < N; i++)
+; A[(char)(N)]++;
+; }
+;
+; CHECK: Assumed Context:
+; CHECK-NEXT: [N] -> { : }
+; CHECK-NEXT: Invalid Context:
+; CHECK-NEXT: [N] -> { : 1 = 0 }
+;
+; CHECK: ReadAccess := [Reduction Type: +] [Scalar: 0]
+; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: +] [Scalar: 0]
+; CHECK-NEXT: [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i8* %A, i16 signext %N) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i16 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %cmp = icmp slt i16 %indvars.iv, %N
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %idxprom = trunc i16 %N to i8
+ %arrayidx = getelementptr inbounds i8, i8* %A, i8 %idxprom
+ %tmp1 = load i8, i8* %arrayidx, align 1
+ %inc = add i8 %tmp1, 1
+ store i8 %inc, i8* %arrayidx, align 1
+ br label %for.inc
+
+for.inc: ; preds = %for.body
+ %indvars.iv.next = add nuw nsw i16 %indvars.iv, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/polly/test/ScopInfo/zero_ext_of_truncate.ll b/polly/test/ScopInfo/zero_ext_of_truncate.ll
new file mode 100644
index 00000000000..6556c50a76d
--- /dev/null
+++ b/polly/test/ScopInfo/zero_ext_of_truncate.ll
@@ -0,0 +1,53 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(unsigned *restrict I, unsigned *restrict A, unsigned N, unsigned M) {
+; for (unsigned i = 0; i < N; i++) {
+; unsigned char V = *I;
+; if (V < M)
+; A[i]++;
+; }
+; }
+;
+; CHECK: Assumed Context:
+; CHECK-NEXT: [N, tmp, M] -> { : }
+; CHECK-NEXT: Invalid Context:
+; CHECK-NEXT: [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) }
+;
+; CHECK: Domain :=
+; CHECK-NEXT: [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias %I, i32* noalias %A, i32 %N, i32 %M) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %tmp = load i32, i32* %I, align 4
+ %conv1 = and i32 %tmp, 255
+ %cmp2 = icmp ult i32 %conv1, %M
+ br i1 %cmp2, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp1 = load i32, i32* %arrayidx, align 4
+ %inc = add i32 %tmp1, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/polly/test/ScopInfo/zero_ext_of_truncate_2.ll b/polly/test/ScopInfo/zero_ext_of_truncate_2.ll
new file mode 100644
index 00000000000..a2af0cfc142
--- /dev/null
+++ b/polly/test/ScopInfo/zero_ext_of_truncate_2.ll
@@ -0,0 +1,54 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(unsigned long *restrict I, unsigned *restrict A, unsigned N) {
+; for (unsigned i = 0; i < N; i++) {
+; unsigned V = *I;
+; if (V < i)
+; A[i]++;
+; }
+; }
+;
+; CHECK: Assumed Context:
+; CHECK-NEXT: [N, tmp] -> { : }
+; CHECK-NEXT: Invalid Context:
+; CHECK-NEXT: [N, tmp] -> { : N > 0 and (tmp < 0 or tmp >= 2147483648) }
+;
+; CHECK: Domain :=
+; CHECK-NEXT: [N, tmp] -> { Stmt_if_then[i0] : i0 > tmp and 0 <= i0 < N };
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i64* noalias %I, i32* noalias %A, i32 %N, i32 %M) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %lftr.wideiv = trunc i64 %indvars.iv to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %tmp = load i64, i64* %I, align 8
+ %conv = trunc i64 %tmp to i32
+ %tmp1 = zext i32 %conv to i64
+ %cmp1 = icmp ult i64 %tmp1, %indvars.iv
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp2 = load i32, i32* %arrayidx, align 4
+ %inc = add i32 %tmp2, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
OpenPOWER on IntegriCloud