diff options
| author | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-03-03 12:26:58 +0000 |
|---|---|---|
| committer | Johannes Doerfert <doerfert@cs.uni-saarland.de> | 2016-03-03 12:26:58 +0000 |
| commit | df88023d2bf0e456974d4bd930c3b0c62741788c (patch) | |
| tree | c062fd62a5aa1a81ba6f296af7bddb631b5c34b5 | |
| parent | 56de012b41fbc8ed743b7de89a0e0b8df3c373db (diff) | |
| download | bcm5719-llvm-df88023d2bf0e456974d4bd930c3b0c62741788c.tar.gz bcm5719-llvm-df88023d2bf0e456974d4bd930c3b0c62741788c.zip | |
[FIX] Consolidation of loads with same pointer but different access relation
This should fix PR19422.
Thanks to Jeremy Huddleston Sequoia for reporting this.
Thanks to Roman Gareev for his investigation and the reduced test case.
llvm-svn: 262612
| -rw-r--r-- | polly/lib/Analysis/ScopInfo.cpp | 22 | ||||
| -rw-r--r-- | polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll | 114 |
2 files changed, 134 insertions, 2 deletions
diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 135c6fc00a5..ebb200aa60f 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -2969,12 +2969,30 @@ void Scop::addInvariantLoads(ScopStmt &Stmt, MemoryAccessList &InvMAs) { if (PointerSCEV != std::get<0>(IAClass) || Ty != std::get<3>(IAClass)) continue; - Consolidated = true; + // If the pointer and the type is equal check if the access function wrt. + // to the domain is equal too. It can happen that the domain fixes + // parameter values and these can be different for distinct part of the + // SCoP. If this happens we cannot consolitate the loads but need to + // create a new invariant load equivalence class. + auto &MAs = std::get<1>(IAClass); + if (!MAs.empty()) { + auto *LastMA = MAs.front(); + + auto *AR = isl_map_range(MA->getAccessRelation()); + auto *LastAR = isl_map_range(LastMA->getAccessRelation()); + bool SameAR = isl_set_is_equal(AR, LastAR); + isl_set_free(AR); + isl_set_free(LastAR); + + if (!SameAR) + continue; + } // Add MA to the list of accesses that are in this class. - auto &MAs = std::get<1>(IAClass); MAs.push_front(MA); + Consolidated = true; + // Unify the execution context of the class and this statement. isl_set *&IAClassDomainCtx = std::get<2>(IAClass); if (IAClassDomainCtx) diff --git a/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll b/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll new file mode 100644 index 00000000000..b09b80a21b4 --- /dev/null +++ b/polly/test/ScopInfo/invariant_load_distinct_parameter_valuations.ll @@ -0,0 +1,114 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; +; Check that we do not consolidate the invariant loads to smp[order - 1] and +; smp[order - 2] in the blocks %0 and %16. While they have the same pointer +; operand (SCEV) they do not have the same access relation due to the +; instanciation of "order" from their domain. +; +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [order, n] -> { Stmt_bb1[] -> MemRef_smp[1] }; +; CHECK-NEXT: Execution Context: [order, n] -> { : order = 2 } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [order, n] -> { Stmt_bb1[] -> MemRef_smp[0] }; +; CHECK-NEXT: Execution Context: [order, n] -> { : order = 2 } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [order, n] -> { Stmt_bb16[] -> MemRef_smp[2] }; +; CHECK-NEXT: Execution Context: [order, n] -> { : order = 3 } +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [order, n] -> { Stmt_bb16[] -> MemRef_smp[1] }; +; CHECK-NEXT: Execution Context: [order, n] -> { : order = 3 } +; CHECK-NEXT: } +; +; ModuleID = '/home/johannes/Downloads/test_case.ll' +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @encode_residual_fixed(i32* %res, i32* %smp, i32 %n, i32 %order) { +bb: + br label %.split + +.split: ; preds = %bb + switch i32 %order, label %bb32 [ + i32 2, label %bb1 + i32 3, label %bb16 + ] + +bb1: ; preds = %.split + %tmp = add nsw i32 %order, -1 + %tmp2 = sext i32 %tmp to i64 + %tmp3 = getelementptr inbounds i32, i32* %smp, i64 %tmp2 + %tmp4 = load i32, i32* %tmp3, align 4 + %tmp5 = add nsw i32 %order, -2 + %tmp6 = sext i32 %tmp5 to i64 + %tmp7 = getelementptr inbounds i32, i32* %smp, i64 %tmp6 + %tmp8 = load i32, i32* %tmp7, align 4 + %tmp9 = sub nsw i32 %tmp4, %tmp8 + %tmp10 = icmp slt i32 %order, %n + br i1 %tmp10, label %.lr.ph, label %.loopexit + +.lr.ph: ; preds = %bb1 + %tmp11 = sext i32 %order to i64 + br label %bb12 + +bb12: ; preds = %bb12, %.lr.ph + %indvars.iv = phi i64 [ %tmp11, %.lr.ph ], [ %indvars.iv.next, %bb12 ] + %i.03 = phi i32 [ %order, %.lr.ph ], [ %tmp14, %bb12 ] + %tmp13 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv + store i32 %tmp9, i32* %tmp13, align 4 + %tmp14 = add nsw i32 %i.03, 2 + %tmp15 = icmp slt i32 %tmp14, %n + %indvars.iv.next = add nsw i64 %indvars.iv, 2 + br i1 %tmp15, label %bb12, label %..loopexit_crit_edge + +bb16: ; preds = %.split + %tmp17 = add nsw i32 %order, -1 + %tmp18 = sext i32 %tmp17 to i64 + %tmp19 = getelementptr inbounds i32, i32* %smp, i64 %tmp18 + %tmp20 = load i32, i32* %tmp19, align 4 + %tmp21 = add nsw i32 %order, -2 + %tmp22 = sext i32 %tmp21 to i64 + %tmp23 = getelementptr inbounds i32, i32* %smp, i64 %tmp22 + %tmp24 = load i32, i32* %tmp23, align 4 + %tmp25 = sub nsw i32 %tmp20, %tmp24 + %tmp26 = icmp slt i32 %order, %n + br i1 %tmp26, label %.lr.ph5, label %.loopexit2 + +.lr.ph5: ; preds = %bb16 + %tmp27 = sext i32 %order to i64 + br label %bb28 + +bb28: ; preds = %bb28, %.lr.ph5 + %indvars.iv6 = phi i64 [ %tmp27, %.lr.ph5 ], [ %indvars.iv.next7, %bb28 ] + %i.14 = phi i32 [ %order, %.lr.ph5 ], [ %tmp30, %bb28 ] + %tmp29 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv6 + store i32 %tmp25, i32* %tmp29, align 4 + %tmp30 = add nsw i32 %i.14, 2 + %tmp31 = icmp slt i32 %tmp30, %n + %indvars.iv.next7 = add nsw i64 %indvars.iv6, 2 + br i1 %tmp31, label %bb28, label %..loopexit2_crit_edge + +..loopexit_crit_edge: ; preds = %bb12 + br label %.loopexit + +.loopexit: ; preds = %..loopexit_crit_edge, %bb1 + br label %bb32 + +..loopexit2_crit_edge: ; preds = %bb28 + br label %.loopexit2 + +.loopexit2: ; preds = %..loopexit2_crit_edge, %bb16 + br label %bb32 + +bb32: ; preds = %.loopexit2, %.loopexit, %.split + %tmp33 = getelementptr inbounds i32, i32* %res, i64 2 + %tmp34 = load i32, i32* %tmp33, align 4 + %tmp35 = icmp eq i32 %tmp34, 5 + br i1 %tmp35, label %bb37, label %bb36 + +bb36: ; preds = %bb32 + unreachable + +bb37: ; preds = %bb32 + ret void +} |

