summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp38
-rw-r--r--polly/lib/CodeGen/IslNodeBuilder.cpp23
-rw-r--r--polly/test/Isl/CodeGen/invariant_load_complex_condition.ll70
-rw-r--r--polly/test/ScopInfo/invariant_load_complex_condition.ll73
4 files changed, 111 insertions, 93 deletions
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index a1643f8b01a..88206f04252 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -94,6 +94,12 @@ static int const MaxDisjunctsInDomain = 20;
// number of disjunct when adding non-convex sets to the context.
static int const MaxDisjunctsInContext = 4;
+// The maximal number of dimensions we allow during invariant load construction.
+// More complex access ranges will result in very high compile time and are also
+// unlikely to result in good code. This value is very high and should only
+// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
+static int const MaxDimensionsInAccessRange = 9;
+
static cl::opt<int>
OptComputeOut("polly-analysis-computeout",
cl::desc("Bound the scop analysis by a maximal amount of "
@@ -4024,6 +4030,35 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, InvariantAccessesTy &InvMAs) {
isl_set_free(DomainCtx);
}
+/// Check if an access range is too complex.
+///
+/// An access range is too complex, if it contains either many disjuncts or
+/// very complex expressions. As a simple heuristic, we assume if a set to
+/// be too complex if the sum of existentially quantified dimensions and
+/// set dimensions is larger than a threshold. This reliably detects both
+/// sets with many disjuncts as well as sets with many divisions as they
+/// arise in h264.
+///
+/// @param AccessRange The range to check for complexity.
+///
+/// @returns True if the access range is too complex.
+static bool isAccessRangeTooComplex(isl::set AccessRange) {
+ unsigned NumTotalDims = 0;
+
+ auto CountDimensions = [&NumTotalDims](isl::basic_set BSet) -> isl::stat {
+ NumTotalDims += BSet.dim(isl::dim::div);
+ NumTotalDims += BSet.dim(isl::dim::set);
+ return isl::stat::ok;
+ };
+
+ AccessRange.foreach_basic_set(CountDimensions);
+
+ if (NumTotalDims > MaxDimensionsInAccessRange)
+ return true;
+
+ return false;
+}
+
isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
// TODO: Loads that are not loop carried, hence are in a statement with
// zero iterators, are by construction invariant, though we
@@ -4072,6 +4107,9 @@ isl::set Scop::getNonHoistableCtx(MemoryAccess *Access, isl::union_map Writes) {
SafeToLoad = AccessRelation.range();
}
+ if (isAccessRangeTooComplex(AccessRelation.range()))
+ return nullptr;
+
isl::union_map Written = Writes.intersect_range(SafeToLoad);
isl::set WrittenCtx = Written.params();
bool IsWritten = !WrittenCtx.is_empty();
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index db23319183a..7783818e17b 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -53,12 +53,6 @@ using namespace llvm;
STATISTIC(VersionedScops, "Number of SCoPs that required versioning.");
-// The maximal number of dimensions we allow during invariant load construction.
-// More complex access ranges will result in very high compile time and are also
-// unlikely to result in good code. This value is very high and should only
-// trigger for corner cases (e.g., the "dct_luma" function in h264, SPEC2006).
-static int const MaxDimensionsInAccessRange = 9;
-
static cl::opt<bool> PollyGenerateRTCPrint(
"polly-codegen-emit-rtc-print",
cl::desc("Emit code that prints the runtime check result dynamically."),
@@ -1134,26 +1128,9 @@ bool IslNodeBuilder::materializeFortranArrayOutermostDimension() {
return true;
}
-/// Add the number of dimensions in @p BS to @p U.
-static isl_stat countTotalDims(__isl_take isl_basic_set *BS, void *U) {
- unsigned *NumTotalDim = static_cast<unsigned *>(U);
- *NumTotalDim += isl_basic_set_total_dim(BS);
- isl_basic_set_free(BS);
- return isl_stat_ok;
-}
-
Value *IslNodeBuilder::preloadUnconditionally(isl_set *AccessRange,
isl_ast_build *Build,
Instruction *AccInst) {
-
- // TODO: This check could be performed in the ScopInfo already.
- unsigned NumTotalDim = 0;
- isl_set_foreach_basic_set(AccessRange, countTotalDims, &NumTotalDim);
- if (NumTotalDim > MaxDimensionsInAccessRange) {
- isl_set_free(AccessRange);
- return nullptr;
- }
-
isl_pw_multi_aff *PWAccRel = isl_pw_multi_aff_from_set(AccessRange);
isl_ast_expr *Access =
isl_ast_build_access_from_pw_multi_aff(Build, PWAccRel);
diff --git a/polly/test/Isl/CodeGen/invariant_load_complex_condition.ll b/polly/test/Isl/CodeGen/invariant_load_complex_condition.ll
deleted file mode 100644
index 66ad94f8e8c..00000000000
--- a/polly/test/Isl/CodeGen/invariant_load_complex_condition.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt %loadPolly -S -polly-codegen -polly-invariant-load-hoisting=true < %s | FileCheck %s
-;
-; Extracted from h246 in SPEC 2006.
-;
-; TODO: We check that we do compile this benchmark in reasonable time.
-; To do so we currently bail out due to the complex access range
-; (multiple modulos) of the invariant load.
-;
-; FIXME: We should not bail with a false RTC here.
-;
-; CHECK-LABEL: polly.preload.begin:
-; CHECK-NOT: br i1
-; CHECK-NOT: br label
-; CHECK: br i1 false, label %polly.start, label %entry.split
-;
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-%struct.IP = type { i32****, i32***, %struct.P, %struct.S, %struct.m }
-%struct.P = type { i32 }
-%struct.S = type { i32 }
-%struct.D = type { i32 }
-%struct.B = type { i32 }
-%struct.E = type { i32 }
-%struct.s = type { i32 }
-%struct.M = type { i32 }
-%struct.C = type { i32 }
-%struct.T = type { i32 }
-%struct.R = type { i32 }
-%struct.m = type { i32 }
-%struct.d = type { i32 }
-
-@img = external global %struct.IP*, align 8
-
-; Function Attrs: nounwind uwtable
-define void @dct_luma(i32 %block_x, i32 %block_y) #0 {
-entry:
- br label %entry.split
-
-entry.split: ; preds = %entry
- %div = sdiv i32 %block_x, 4
- %div1 = sdiv i32 %block_y, 4
- %rem = srem i32 %div1, 2
- %mul4 = shl nsw i32 %rem, 1
- %rem5 = srem i32 %div, 2
- %add6 = add nsw i32 %mul4, %rem5
- %idxprom = sext i32 %add6 to i64
- %0 = load %struct.IP*, %struct.IP** @img, align 8
- %cofAC = getelementptr inbounds %struct.IP, %struct.IP* %0, i32 0, i32 0
- %1 = load i32****, i32***** %cofAC, align 8
- %arrayidx = getelementptr inbounds i32***, i32**** %1, i64 0
- %2 = load i32***, i32**** %arrayidx, align 8
- %arrayidx8 = getelementptr inbounds i32**, i32*** %2, i64 %idxprom
- %3 = load i32**, i32*** %arrayidx8, align 8
- %mb_data = getelementptr inbounds %struct.IP, %struct.IP* %0, i64 0, i32 4
- %4 = load %struct.m, %struct.m* %mb_data, align 8
- br i1 false, label %land.rhs, label %land.end
-
-land.rhs: ; preds = %entry.split
- br label %land.end
-
-land.end: ; preds = %land.rhs, %entry.split
- %5 = phi i1 [ false, %entry.split ], [ undef, %land.rhs ]
- br i1 %5, label %for.cond104.preheader, label %for.cond34.preheader
-
-for.cond34.preheader: ; preds = %land.end
- ret void
-
-for.cond104.preheader: ; preds = %land.end
- ret void
-}
diff --git a/polly/test/ScopInfo/invariant_load_complex_condition.ll b/polly/test/ScopInfo/invariant_load_complex_condition.ll
new file mode 100644
index 00000000000..f5456564a8b
--- /dev/null
+++ b/polly/test/ScopInfo/invariant_load_complex_condition.ll
@@ -0,0 +1,73 @@
+; RUN: opt %loadPolly -S -polly-scops -analyze \
+; RUN: -polly-invariant-load-hoisting=true < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.IP = type { i32****, i32***, %struct.P, %struct.S, %struct.m }
+%struct.P = type { i32 }
+%struct.S = type { i32 }
+%struct.D = type { i32 }
+%struct.B = type { i32 }
+%struct.E = type { i32 }
+%struct.s = type { i32 }
+%struct.M = type { i32 }
+%struct.C = type { i32 }
+%struct.T = type { i32 }
+%struct.R = type { i32 }
+%struct.m = type { i32 }
+%struct.d = type { i32 }
+
+
+; Verify that we do not invariant load hoist very complex conditions.
+
+; CHECK: Statements {
+; CHECK-NEXT: Stmt_entry_split
+; CHECK-NEXT: Domain :=
+; CHECK-NEXT: [block_y, block_x] -> { Stmt_entry_split[] };
+; CHECK-NEXT: Schedule :=
+; CHECK-NEXT: [block_y, block_x] -> { Stmt_entry_split[] -> [] };
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [block_y, block_x] -> { Stmt_entry_split[] -> MemRef4[o0] : (-3 <= block_y < 0 and block_x <= -4 and -8 + block_x - 4o0 <= 8*floor((-1 + block_x)/8) <= -5 + block_x - 4o0) or (-3 <= block_y < 0 and block_x >= 0 and -3 + block_x - 4o0 <= 8*floor((block_x)/8) <= block_x - 4o0) or (block_y <= -4 and block_x <= -4 and -16 + block_x - 4o0 - 8*floor((-1 + block_x)/8) + 8*floor((-1 + block_y)/4) <= 16*floor((-1 + block_y)/8) <= -13 + block_x - 4o0 - 8*floor((-1 + block_x)/8) + 8*floor((-1 + block_y)/4)) or (block_y <= -4 and block_x >= 0 and -11 + block_x - 4o0 - 8*floor((block_x)/8) + 8*floor((-1 + block_y)/4) <= 16*floor((-1 + block_y)/8) <= -8 + block_x - 4o0 - 8*floor((block_x)/8) + 8*floor((-1 + block_y)/4)) or (block_y >= 0 and block_x <= -4 and -8 + block_x - 4o0 - 8*floor((-1 + block_x)/8) + 8*floor((block_y)/4) <= 16*floor((block_y)/8) <= -5 + block_x - 4o0 - 8*floor((-1 + block_x)/8) + 8*floor((block_y)/4)) or (block_y >= 0 and block_x >= 0 and -3 + block_x - 4o0 - 8*floor((block_x)/8) + 8*floor((block_y)/4) <= 16*floor((block_y)/8) <= block_x - 4o0 - 8*floor((block_x)/8) + 8*floor((block_y)/4)) or (4*floor((block_y)/8) = -o0 + 2*floor((block_y)/4) and block_y >= 0 and -3 <= block_x < 0 and 4*floor((block_y)/4) >= -7 + block_y + 2o0 and 4*floor((block_y)/4) <= block_y + 2o0) or (4*floor((-1 + block_y)/8) = -2 - o0 + 2*floor((-1 + block_y)/4) and block_y <= -4 and -3 <= block_x < 0 and 4*floor((-1 + block_y)/4) >= -4 + block_y + 2o0 and 4*floor((-1 + block_y)/4) <= 3 + block_y + 2o0); Stmt_entry_split[] -> MemRef4[0] : -3 <= block_y < 0 and -3 <= block_x < 0 };
+; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
+; CHECK-NEXT: [block_y, block_x] -> { Stmt_entry_split[] -> MemRef0[] };
+; CHECK-NEXT: }
+
+@img = external global %struct.IP*, align 8
+
+; Function Attrs: nounwind uwtable
+define void @dct_luma(i32 %block_x, i32 %block_y) #0 {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %div = sdiv i32 %block_x, 4
+ %div1 = sdiv i32 %block_y, 4
+ %rem = srem i32 %div1, 2
+ %mul4 = shl nsw i32 %rem, 1
+ %rem5 = srem i32 %div, 2
+ %add6 = add nsw i32 %mul4, %rem5
+ %idxprom = sext i32 %add6 to i64
+ %0 = load %struct.IP*, %struct.IP** @img, align 8
+ %cofAC = getelementptr inbounds %struct.IP, %struct.IP* %0, i32 0, i32 0
+ %1 = load i32****, i32***** %cofAC, align 8
+ %arrayidx = getelementptr inbounds i32***, i32**** %1, i64 0
+ %2 = load i32***, i32**** %arrayidx, align 8
+ %arrayidx8 = getelementptr inbounds i32**, i32*** %2, i64 %idxprom
+ %3 = load i32**, i32*** %arrayidx8, align 8
+ %mb_data = getelementptr inbounds %struct.IP, %struct.IP* %0, i64 0, i32 4
+ %4 = load %struct.m, %struct.m* %mb_data, align 8
+ br i1 false, label %land.rhs, label %land.end
+
+land.rhs: ; preds = %entry.split
+ br label %land.end
+
+land.end: ; preds = %land.rhs, %entry.split
+ %5 = phi i1 [ false, %entry.split ], [ undef, %land.rhs ]
+ br i1 %5, label %for.cond104.preheader, label %for.cond34.preheader
+
+for.cond34.preheader: ; preds = %land.end
+ ret void
+
+for.cond104.preheader: ; preds = %land.end
+ ret void
+}
OpenPOWER on IntegriCloud