diff options
| author | Michael Kruse <llvm@meinersbur.de> | 2017-09-20 11:53:05 +0000 |
|---|---|---|
| committer | Michael Kruse <llvm@meinersbur.de> | 2017-09-20 11:53:05 +0000 |
| commit | 8dceb7606663d47c8a76acf0bec23a46a191b31c (patch) | |
| tree | cc19b5c0d63c41238507ecab4cd9345e5014ba85 | |
| parent | 83ca8a2b717c7403fdc3e61c42ad93d43396d892 (diff) | |
| download | bcm5719-llvm-8dceb7606663d47c8a76acf0bec23a46a191b31c.tar.gz bcm5719-llvm-8dceb7606663d47c8a76acf0bec23a46a191b31c.zip | |
[ScheduleOptimizer] Fix and test schedule tree statistics.
Fix walking over the schedule tree to collect its properties
(Number of permutable bands etc.).
Also add regression tests for these statistics.
llvm-svn: 313750
| -rw-r--r-- | polly/lib/Transform/ScheduleOptimizer.cpp | 70 | ||||
| -rw-r--r-- | polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll | 2 | ||||
| -rw-r--r-- | polly/test/ScheduleOptimizer/statistics.ll | 279 |
3 files changed, 319 insertions, 32 deletions
diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index c81f1875382..0a2fe8b6897 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -1431,37 +1431,43 @@ static void walkScheduleTreeForStatistics(isl::schedule Schedule, int Version) { if (!Root) return; - Root.foreach_ancestor_top_down([Version]( - isl::schedule_node Node) -> isl::stat { - switch (isl_schedule_node_get_type(Node.get())) { - case isl_schedule_node_band: { - NumBands[Version]++; - if (isl_schedule_node_band_get_permutable(Node.get()) == isl_bool_true) - NumPermutable[Version]++; - - int CountMembers = isl_schedule_node_band_n_member(Node.get()); - NumBandMembers[Version] += CountMembers; - for (int i = 0; i < CountMembers; i += 1) { - if (Node.band_member_get_coincident(i)) - NumCoincident[Version]++; - } - break; - } - - case isl_schedule_node_filter: - NumFilters[Version]++; - break; - - case isl_schedule_node_extension: - NumExtension[Version]++; - break; - - default: - break; - } - - return isl::stat::ok; - }); + isl_schedule_node_foreach_descendant_top_down( + Root.get(), + [](__isl_keep isl_schedule_node *nodeptr, void *user) -> isl_bool { + isl::schedule_node Node = isl::manage(isl_schedule_node_copy(nodeptr)); + int Version = *static_cast<int *>(user); + + switch (isl_schedule_node_get_type(Node.get())) { + case isl_schedule_node_band: { + NumBands[Version]++; + if (isl_schedule_node_band_get_permutable(Node.get()) == + isl_bool_true) + NumPermutable[Version]++; + + int CountMembers = isl_schedule_node_band_n_member(Node.get()); + NumBandMembers[Version] += CountMembers; + for (int i = 0; i < CountMembers; i += 1) { + if (Node.band_member_get_coincident(i)) + NumCoincident[Version]++; + } + break; + } + + case isl_schedule_node_filter: + NumFilters[Version]++; + break; + + case isl_schedule_node_extension: + NumExtension[Version]++; + break; + + default: + break; + } + + return isl_bool_true; + }, + &Version); } bool IslScheduleOptimizer::runOnScop(Scop &S) { @@ -1613,7 +1619,7 @@ bool IslScheduleOptimizer::runOnScop(Scop &S) { auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); const OptimizerAdditionalInfoTy OAI = {TTI, const_cast<Dependences *>(&D)}; auto NewSchedule = ScheduleTreeOptimizer::optimizeSchedule(Schedule, &OAI); - walkScheduleTreeForStatistics(NewSchedule, 1); + walkScheduleTreeForStatistics(NewSchedule, 2); if (!ScheduleTreeOptimizer::isProfitableSchedule(S, NewSchedule)) return false; diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll index 179b612f361..a7d77050ff2 100644 --- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll +++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts.ll @@ -1,6 +1,7 @@ ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=false \ ; RUN: -debug < %s 2>&1| FileCheck %s ; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -debug < %s 2>&1| FileCheck %s --check-prefix=PATTERN-MATCHING-OPTS +; RUN: opt %loadPolly -polly-opt-isl -polly-pattern-matching-based-opts=true -stats -disable-output < %s 2>&1| FileCheck %s --check-prefix=STATS -match-full-lines ; REQUIRES: asserts ; ; /* C := alpha*A*B + beta*C */ @@ -14,6 +15,7 @@ ; ; CHECK-NOT: The matrix multiplication pattern was detected ; PATTERN-MATCHING-OPTS: The matrix multiplication pattern was detected +; STATS: 1 polly-opt-isl - Number of matrix multiplication patterns detected and optimized ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" diff --git a/polly/test/ScheduleOptimizer/statistics.ll b/polly/test/ScheduleOptimizer/statistics.ll new file mode 100644 index 00000000000..086d96847fa --- /dev/null +++ b/polly/test/ScheduleOptimizer/statistics.ll @@ -0,0 +1,279 @@ +; RUN: opt %loadPolly -polly-opt-isl -stats -disable-output < %s 2>&1 | FileCheck %s -match-full-lines + +; REQUIRES: asserts + +; void foo_1d(float *A) { +; for (long i = 0; i < 1024; i++) +; A[i] += i; +; } +; +; void foo_2d(float *A) { +; for (long i = 0; i < 1024; i++) +; for (long j = 0; j < 1024; j++) +; A[i + j] += i + j; +; } +; +; void foo_3d(float *A) { +; for (long i = 0; i < 1024; i++) +; for (long j = 0; j < 1024; j++) +; for (long k = 0; k < 1024; k++) +; A[i + j + k] += i + j + k; +; } +; +; void foo_4d(float *A) { +; for (long i = 0; i < 1024; i++) +; for (long j = 0; j < 1024; j++) +; for (long k = 0; k < 1024; k++) +; for (long l = 0; l < 1024; l++) +; A[i + j + k + l] += i + j + k + l; +; } +; +; void foo_zero_iterations(float *S) { +; for (long i = 0; i < 0; i++) +; A[i] += i; +; } +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @foo_1d(float* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ] + %exitcond = icmp ne i64 %i.0, 1024 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = sitofp i64 %i.0 to float + %tmp3 = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp4 = load float, float* %tmp3, align 4 + %tmp5 = fadd float %tmp4, %tmp + store float %tmp5, float* %tmp3, align 4 + br label %bb6 + +bb6: ; preds = %bb2 + %tmp7 = add nuw nsw i64 %i.0, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} + +define void @foo_2d(float* %A) { +bb: + br label %bb2 + +bb2: ; preds = %bb14, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp15, %bb14 ] + %exitcond1 = icmp ne i64 %i.0, 1024 + br i1 %exitcond1, label %bb3, label %bb16 + +bb3: ; preds = %bb2 + br label %bb4 + +bb4: ; preds = %bb11, %bb3 + %j.0 = phi i64 [ 0, %bb3 ], [ %tmp12, %bb11 ] + %exitcond = icmp ne i64 %j.0, 1024 + br i1 %exitcond, label %bb5, label %bb13 + +bb5: ; preds = %bb4 + %tmp = add nuw nsw i64 %i.0, %j.0 + %tmp6 = sitofp i64 %tmp to float + %tmp7 = add nuw nsw i64 %i.0, %j.0 + %tmp8 = getelementptr inbounds float, float* %A, i64 %tmp7 + %tmp9 = load float, float* %tmp8, align 4 + %tmp10 = fadd float %tmp9, %tmp6 + store float %tmp10, float* %tmp8, align 4 + br label %bb11 + +bb11: ; preds = %bb5 + %tmp12 = add nuw nsw i64 %j.0, 1 + br label %bb4 + +bb13: ; preds = %bb4 + br label %bb14 + +bb14: ; preds = %bb13 + %tmp15 = add nuw nsw i64 %i.0, 1 + br label %bb2 + +bb16: ; preds = %bb2 + ret void +} + +define void @foo_3d(float* %A) { +bb: + br label %bb3 + +bb3: ; preds = %bb22, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp23, %bb22 ] + %exitcond2 = icmp ne i64 %i.0, 1024 + br i1 %exitcond2, label %bb4, label %bb24 + +bb4: ; preds = %bb3 + br label %bb5 + +bb5: ; preds = %bb19, %bb4 + %j.0 = phi i64 [ 0, %bb4 ], [ %tmp20, %bb19 ] + %exitcond1 = icmp ne i64 %j.0, 1024 + br i1 %exitcond1, label %bb6, label %bb21 + +bb6: ; preds = %bb5 + br label %bb7 + +bb7: ; preds = %bb16, %bb6 + %k.0 = phi i64 [ 0, %bb6 ], [ %tmp17, %bb16 ] + %exitcond = icmp ne i64 %k.0, 1024 + br i1 %exitcond, label %bb8, label %bb18 + +bb8: ; preds = %bb7 + %tmp = add nuw nsw i64 %i.0, %j.0 + %tmp9 = add nuw nsw i64 %tmp, %k.0 + %tmp10 = sitofp i64 %tmp9 to float + %tmp11 = add nuw nsw i64 %i.0, %j.0 + %tmp12 = add nuw nsw i64 %tmp11, %k.0 + %tmp13 = getelementptr inbounds float, float* %A, i64 %tmp12 + %tmp14 = load float, float* %tmp13, align 4 + %tmp15 = fadd float %tmp14, %tmp10 + store float %tmp15, float* %tmp13, align 4 + br label %bb16 + +bb16: ; preds = %bb8 + %tmp17 = add nuw nsw i64 %k.0, 1 + br label %bb7 + +bb18: ; preds = %bb7 + br label %bb19 + +bb19: ; preds = %bb18 + %tmp20 = add nuw nsw i64 %j.0, 1 + br label %bb5 + +bb21: ; preds = %bb5 + br label %bb22 + +bb22: ; preds = %bb21 + %tmp23 = add nuw nsw i64 %i.0, 1 + br label %bb3 + +bb24: ; preds = %bb3 + ret void +} + +define void @foo_4d(float* %A) { +bb: + br label %bb4 + +bb4: ; preds = %bb30, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp31, %bb30 ] + %exitcond3 = icmp ne i64 %i.0, 1024 + br i1 %exitcond3, label %bb5, label %bb32 + +bb5: ; preds = %bb4 + br label %bb6 + +bb6: ; preds = %bb27, %bb5 + %j.0 = phi i64 [ 0, %bb5 ], [ %tmp28, %bb27 ] + %exitcond2 = icmp ne i64 %j.0, 1024 + br i1 %exitcond2, label %bb7, label %bb29 + +bb7: ; preds = %bb6 + br label %bb8 + +bb8: ; preds = %bb24, %bb7 + %k.0 = phi i64 [ 0, %bb7 ], [ %tmp25, %bb24 ] + %exitcond1 = icmp ne i64 %k.0, 1024 + br i1 %exitcond1, label %bb9, label %bb26 + +bb9: ; preds = %bb8 + br label %bb10 + +bb10: ; preds = %bb21, %bb9 + %l.0 = phi i64 [ 0, %bb9 ], [ %tmp22, %bb21 ] + %exitcond = icmp ne i64 %l.0, 1024 + br i1 %exitcond, label %bb11, label %bb23 + +bb11: ; preds = %bb10 + %tmp = add nuw nsw i64 %i.0, %j.0 + %tmp12 = add nuw nsw i64 %tmp, %k.0 + %tmp13 = add nuw nsw i64 %tmp12, %l.0 + %tmp14 = sitofp i64 %tmp13 to float + %tmp15 = add nuw nsw i64 %i.0, %j.0 + %tmp16 = add nuw nsw i64 %tmp15, %k.0 + %tmp17 = add nuw nsw i64 %tmp16, %l.0 + %tmp18 = getelementptr inbounds float, float* %A, i64 %tmp17 + %tmp19 = load float, float* %tmp18, align 4 + %tmp20 = fadd float %tmp19, %tmp14 + store float %tmp20, float* %tmp18, align 4 + br label %bb21 + +bb21: ; preds = %bb11 + %tmp22 = add nuw nsw i64 %l.0, 1 + br label %bb10 + +bb23: ; preds = %bb10 + br label %bb24 + +bb24: ; preds = %bb23 + %tmp25 = add nuw nsw i64 %k.0, 1 + br label %bb8 + +bb26: ; preds = %bb8 + br label %bb27 + +bb27: ; preds = %bb26 + %tmp28 = add nuw nsw i64 %j.0, 1 + br label %bb6 + +bb29: ; preds = %bb6 + br label %bb30 + +bb30: ; preds = %bb29 + %tmp31 = add nuw nsw i64 %i.0, 1 + br label %bb4 + +bb32: ; preds = %bb4 + ret void +} + +define void @foo_zero_iterations(float* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb6, %bb + %i.0 = phi i64 [ 0, %bb ], [ %tmp7, %bb6 ] + %exitcond = icmp ne i64 %i.0, 0 + br i1 %exitcond, label %bb2, label %bb8 + +bb2: ; preds = %bb1 + %tmp = sitofp i64 %i.0 to float + %tmp3 = getelementptr inbounds float, float* %A, i64 %i.0 + %tmp4 = load float, float* %tmp3, align 4 + %tmp5 = fadd float %tmp4, %tmp + store float %tmp5, float* %tmp3, align 4 + br label %bb6 + +bb6: ; preds = %bb2 + %tmp7 = add nuw nsw i64 %i.0, 1 + br label %bb1 + +bb8: ; preds = %bb1 + ret void +} + + +; CHECK-DAG: 1 polly-opt-isl - Number of first level tiling applied +; CHECK-DAG: 9 polly-opt-isl - Number of affine loops optimized +; CHECK-DAG: 10 polly-opt-isl - Number of band members (original) +; CHECK-DAG: 10 polly-opt-isl - Number of band members (after scheduler) +; CHECK-DAG: 12 polly-opt-isl - Number of band members (after optimizer) +; CHECK-DAG: 10 polly-opt-isl - Number of bands (original) +; CHECK-DAG: 7 polly-opt-isl - Number of bands (after scheduler) +; CHECK-DAG: 8 polly-opt-isl - Number of bands (after optimizer) +; CHECK-DAG: 4 polly-opt-isl - Number of coincident band members (after scheduler) +; CHECK-DAG: 5 polly-opt-isl - Number of coincident band members (after optimizer) +; CHECK-DAG: 7 polly-opt-isl - Number of permutable bands (after scheduler) +; CHECK-DAG: 8 polly-opt-isl - Number of permutable bands (after optimizer) +; CHECK-DAG: 3 polly-opt-isl - Number of scops optimized +; CHECK-DAG: 4 polly-opt-isl - Number of scops processed +; CHECK-DAG: 4 polly-opt-isl - Number of scops rescheduled |

