summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Doerfert <doerfert@cs.uni-saarland.de>2016-03-03 12:26:58 +0000
committerJohannes Doerfert <doerfert@cs.uni-saarland.de>2016-03-03 12:26:58 +0000
commitdf88023d2bf0e456974d4bd930c3b0c62741788c (patch)
treec062fd62a5aa1a81ba6f296af7bddb631b5c34b5
parent56de012b41fbc8ed743b7de89a0e0b8df3c373db (diff)
downloadbcm5719-llvm-df88023d2bf0e456974d4bd930c3b0c62741788c.tar.gz
bcm5719-llvm-df88023d2bf0e456974d4bd930c3b0c62741788c.zip
[FIX] Consolidation of loads with same pointer but different access relation
This should fix PR19422. Thanks to Jeremy Huddleston Sequoia for reporting this. Thanks to Roman Gareev for his investigation and the reduced test case. llvm-svn: 262612
-rw-r--r--polly/lib/Analysis/ScopInfo.cpp22
-rw-r--r--polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll114
2 files changed, 134 insertions, 2 deletions
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp
index 135c6fc00a5..ebb200aa60f 100644
--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@@ -2969,12 +2969,30 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) {
if (PointerSCEV != std::get<0>(IAClass) || Ty != std::get<3>(IAClass))
continue;
- Consolidated = true;
+ // If the pointer and the type is equal check if the access function wrt.
+ // to the domain is equal too. It can happen that the domain fixes
+ // parameter values and these can be different for distinct part of the
+ // SCoP. If this happens we cannot consolitate the loads but need to
+ // create a new invariant load equivalence class.
+ auto &MAs = std::get<1>(IAClass);
+ if (!MAs.empty()) {
+ auto *LastMA = MAs.front();
+
+ auto *AR = isl_map_range(MA->getAccessRelation());
+ auto *LastAR = isl_map_range(LastMA->getAccessRelation());
+ bool SameAR = isl_set_is_equal(AR, LastAR);
+ isl_set_free(AR);
+ isl_set_free(LastAR);
+
+ if (!SameAR)
+ continue;
+ }
// Add MA to the list of accesses that are in this class.
- auto &MAs = std::get<1>(IAClass);
MAs.push_front(MA);
+ Consolidated = true;
+
// Unify the execution context of the class and this statement.
isl_set *&IAClassDomainCtx = std::get<2>(IAClass);
if (IAClassDomainCtx)
diff --git a/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll b/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll
new file mode 100644
index 00000000000..b09b80a21b4
--- /dev/null
+++ b/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll
@@ -0,0 +1,114 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; Check that we do not consolidate the invariant loads to smp[order - 1] and
+; smp[order - 2] in the blocks %0 and %16. While they have the same pointer
+; operand (SCEV) they do not have the same access relation due to the
+; instanciation of "order" from their domain.
+;
+; CHECK: Invariant Accesses: {
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb1[] -> MemRef_smp[1] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 2 }
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb1[] -> MemRef_smp[0] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 2 }
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb16[] -> MemRef_smp[2] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 3 }
+; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: [order, n] -> { Stmt_bb16[] -> MemRef_smp[1] };
+; CHECK-NEXT: Execution Context: [order, n] -> { : order = 3 }
+; CHECK-NEXT: }
+;
+; ModuleID = '/home/johannes/Downloads/test_case.ll'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define void @encode_residual_fixed(i32* %res, i32* %smp, i32 %n, i32 %order) {
+bb:
+ br label %.split
+
+.split: ; preds = %bb
+ switch i32 %order, label %bb32 [
+ i32 2, label %bb1
+ i32 3, label %bb16
+ ]
+
+bb1: ; preds = %.split
+ %tmp = add nsw i32 %order, -1
+ %tmp2 = sext i32 %tmp to i64
+ %tmp3 = getelementptr inbounds i32, i32* %smp, i64 %tmp2
+ %tmp4 = load i32, i32* %tmp3, align 4
+ %tmp5 = add nsw i32 %order, -2
+ %tmp6 = sext i32 %tmp5 to i64
+ %tmp7 = getelementptr inbounds i32, i32* %smp, i64 %tmp6
+ %tmp8 = load i32, i32* %tmp7, align 4
+ %tmp9 = sub nsw i32 %tmp4, %tmp8
+ %tmp10 = icmp slt i32 %order, %n
+ br i1 %tmp10, label %.lr.ph, label %.loopexit
+
+.lr.ph: ; preds = %bb1
+ %tmp11 = sext i32 %order to i64
+ br label %bb12
+
+bb12: ; preds = %bb12, %.lr.ph
+ %indvars.iv = phi i64 [ %tmp11, %.lr.ph ], [ %indvars.iv.next, %bb12 ]
+ %i.03 = phi i32 [ %order, %.lr.ph ], [ %tmp14, %bb12 ]
+ %tmp13 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
+ store i32 %tmp9, i32* %tmp13, align 4
+ %tmp14 = add nsw i32 %i.03, 2
+ %tmp15 = icmp slt i32 %tmp14, %n
+ %indvars.iv.next = add nsw i64 %indvars.iv, 2
+ br i1 %tmp15, label %bb12, label %..loopexit_crit_edge
+
+bb16: ; preds = %.split
+ %tmp17 = add nsw i32 %order, -1
+ %tmp18 = sext i32 %tmp17 to i64
+ %tmp19 = getelementptr inbounds i32, i32* %smp, i64 %tmp18
+ %tmp20 = load i32, i32* %tmp19, align 4
+ %tmp21 = add nsw i32 %order, -2
+ %tmp22 = sext i32 %tmp21 to i64
+ %tmp23 = getelementptr inbounds i32, i32* %smp, i64 %tmp22
+ %tmp24 = load i32, i32* %tmp23, align 4
+ %tmp25 = sub nsw i32 %tmp20, %tmp24
+ %tmp26 = icmp slt i32 %order, %n
+ br i1 %tmp26, label %.lr.ph5, label %.loopexit2
+
+.lr.ph5: ; preds = %bb16
+ %tmp27 = sext i32 %order to i64
+ br label %bb28
+
+bb28: ; preds = %bb28, %.lr.ph5
+ %indvars.iv6 = phi i64 [ %tmp27, %.lr.ph5 ], [ %indvars.iv.next7, %bb28 ]
+ %i.14 = phi i32 [ %order, %.lr.ph5 ], [ %tmp30, %bb28 ]
+ %tmp29 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv6
+ store i32 %tmp25, i32* %tmp29, align 4
+ %tmp30 = add nsw i32 %i.14, 2
+ %tmp31 = icmp slt i32 %tmp30, %n
+ %indvars.iv.next7 = add nsw i64 %indvars.iv6, 2
+ br i1 %tmp31, label %bb28, label %..loopexit2_crit_edge
+
+..loopexit_crit_edge: ; preds = %bb12
+ br label %.loopexit
+
+.loopexit: ; preds = %..loopexit_crit_edge, %bb1
+ br label %bb32
+
+..loopexit2_crit_edge: ; preds = %bb28
+ br label %.loopexit2
+
+.loopexit2: ; preds = %..loopexit2_crit_edge, %bb16
+ br label %bb32
+
+bb32: ; preds = %.loopexit2, %.loopexit, %.split
+ %tmp33 = getelementptr inbounds i32, i32* %res, i64 2
+ %tmp34 = load i32, i32* %tmp33, align 4
+ %tmp35 = icmp eq i32 %tmp34, 5
+ br i1 %tmp35, label %bb37, label %bb36
+
+bb36: ; preds = %bb32
+ unreachable
+
+bb37: ; preds = %bb32
+ ret void
+}
OpenPOWER on IntegriCloud