summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp22
-rw-r--r--polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll114
2 files changed, 134 insertions, 2 deletions
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 135c6fc00a5..ebb200aa60f 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -2969,12 +2969,30 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) {
if (PointerSCEV != std::get<0>(IAClass) || Ty != std::get<3>(IAClass))
continue;
- Consolidated = true;
+ // If the pointer and the type is equal check if the access function wrt.
+ // to the domain is equal too. It can happen that the domain fixes
+ // parameter values and these can be different for distinct part of the
+ // SCoP. If this happens we cannot consolitate the loads but need to
+ // create a new invariant load equivalence class.
+ auto &MAs = std::get<1>(IAClass);
+ if (!MAs.empty()) {
+ auto *LastMA = MAs.front();
+
+ auto *AR = isl_map_range(MA->getAccessRelation());
+ auto *LastAR = isl_map_range(LastMA->getAccessRelation());
+ bool SameAR = isl_set_is_equal(AR, LastAR);
+ isl_set_free(AR);
+ isl_set_free(LastAR);
+
+ if (!SameAR)
+ continue;
+ }
// Add MA to the list of accesses that are in this class.
- auto &MAs = std::get<1>(IAClass);
MAs.push_front(MA);
+ Consolidated = true;
+
// Unify the execution context of the class and this statement.
isl_set *&IAClassDomainCtx = std::get<2>(IAClass);
if (IAClassDomainCtx)
diff --git a/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll b/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll
new file mode 100644
index 00000000000..b09b80a21b4
--- /dev/null
+++ b/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll
@@ -0,0 +1,114 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; Check that we do not consolidate the invariant loads to smp[order - 1] and
+; smp[order - 2] in the blocks %0 and %16. While they have the same pointer
+; operand (SCEV) they do not have the same access relation due to the
+; instanciation of "order" from their domain.
+;
+; CHECK: Invariant Accesses: {
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb1[] -> MemRef_smp[1] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 2 }
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb1[] -> MemRef_smp[0] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 2 }
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb16[] -> MemRef_smp[2] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 3 }
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb16[] -> MemRef_smp[1] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 3 }
+; CHECK-NEXT: }
+;
+; ModuleID = '/home/johannes/Downloads/test_case.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define void @encode_residual_fixed(i32* %res, i32* %smp, i32 %n, i32 %order) {
+bb:
+ br label %.split
+
+.split: ; preds = %bb
+ switch i32 %order, label %bb32 [
+ i32 2, label %bb1
+ i32 3, label %bb16
+ ]
+
+bb1: ; preds = %.split
+ %tmp = add nsw i32 %order, -1
+ %tmp2 = sext i32 %tmp to i64
+ %tmp3 = getelementptr inbounds i32, i32* %smp, i64 %tmp2
+ %tmp4 = load i32, i32* %tmp3, align 4
+ %tmp5 = add nsw i32 %order, -2
+ %tmp6 = sext i32 %tmp5 to i64
+ %tmp7 = getelementptr inbounds i32, i32* %smp, i64 %tmp6
+ %tmp8 = load i32, i32* %tmp7, align 4
+ %tmp9 = sub nsw i32 %tmp4, %tmp8
+ %tmp10 = icmp slt i32 %order, %n
+ br i1 %tmp10, label %.lr.ph, label %.loopexit
+
+.lr.ph: ; preds = %bb1
+ %tmp11 = sext i32 %order to i64
+ br label %bb12
+
+bb12: ; preds = %bb12, %.lr.ph
+ %indvars.iv = phi i64 [ %tmp11, %.lr.ph ], [ %indvars.iv.next, %bb12 ]
+ %i.03 = phi i32 [ %order, %.lr.ph ], [ %tmp14, %bb12 ]
+ %tmp13 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
+ store i32 %tmp9, i32* %tmp13, align 4
+ %tmp14 = add nsw i32 %i.03, 2
+ %tmp15 = icmp slt i32 %tmp14, %n
+ %indvars.iv.next = add nsw i64 %indvars.iv, 2
+ br i1 %tmp15, label %bb12, label %..loopexit_crit_edge
+
+bb16: ; preds = %.split
+ %tmp17 = add nsw i32 %order, -1
+ %tmp18 = sext i32 %tmp17 to i64
+ %tmp19 = getelementptr inbounds i32, i32* %smp, i64 %tmp18
+ %tmp20 = load i32, i32* %tmp19, align 4
+ %tmp21 = add nsw i32 %order, -2
+ %tmp22 = sext i32 %tmp21 to i64
+ %tmp23 = getelementptr inbounds i32, i32* %smp, i64 %tmp22
+ %tmp24 = load i32, i32* %tmp23, align 4
+ %tmp25 = sub nsw i32 %tmp20, %tmp24
+ %tmp26 = icmp slt i32 %order, %n
+ br i1 %tmp26, label %.lr.ph5, label %.loopexit2
+
+.lr.ph5: ; preds = %bb16
+ %tmp27 = sext i32 %order to i64
+ br label %bb28
+
+bb28: ; preds = %bb28, %.lr.ph5
+ %indvars.iv6 = phi i64 [ %tmp27, %.lr.ph5 ], [ %indvars.iv.next7, %bb28 ]
+ %i.14 = phi i32 [ %order, %.lr.ph5 ], [ %tmp30, %bb28 ]
+ %tmp29 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv6
+ store i32 %tmp25, i32* %tmp29, align 4
+ %tmp30 = add nsw i32 %i.14, 2
+ %tmp31 = icmp slt i32 %tmp30, %n
+ %indvars.iv.next7 = add nsw i64 %indvars.iv6, 2
+ br i1 %tmp31, label %bb28, label %..loopexit2_crit_edge
+
+..loopexit_crit_edge: ; preds = %bb12
+ br label %.loopexit
+
+.loopexit: ; preds = %..loopexit_crit_edge, %bb1
+ br label %bb32
+
+..loopexit2_crit_edge: ; preds = %bb28
+ br label %.loopexit2
+
+.loopexit2: ; preds = %..loopexit2_crit_edge, %bb16
+ br label %bb32
+
+bb32: ; preds = %.loopexit2, %.loopexit, %.split
+ %tmp33 = getelementptr inbounds i32, i32* %res, i64 2
+ %tmp34 = load i32, i32* %tmp33, align 4
+ %tmp35 = icmp eq i32 %tmp34, 5
+ br i1 %tmp35, label %bb37, label %bb36
+
+bb36: ; preds = %bb32
+ unreachable
+
+bb37: ; preds = %bb32
+ ret void
+}
OpenPOWER on IntegriCloud