summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_runtime.cpp
diff options
context:
space:
mode:
authorPaul Osmialowski <pawel.osmialowski@arm.com>2017-08-10 23:04:11 +0000
committerPaul Osmialowski <pawel.osmialowski@arm.com>2017-08-10 23:04:11 +0000
commita016279422701f1c4ad4fab029356b44160ac14d (patch)
treeb32473d2daf9432771a55dbc3da5895d73d2b380 /openmp/runtime/src/kmp_runtime.cpp
parentb21739f9881d8860a53ce55168f1c47163b16bec (diff)
downloadbcm5719-llvm-a016279422701f1c4ad4fab029356b44160ac14d.tar.gz
bcm5719-llvm-a016279422701f1c4ad4fab029356b44160ac14d.zip
OMP_PROC_BIND: better spread
This change improves the way threads are spread across cores when OMP_PROC_BIND=spread is set and no unusual affinity masks are in use. Differential Revision: https://reviews.llvm.org/D36510 llvm-svn: 310670
Diffstat (limited to 'openmp/runtime/src/kmp_runtime.cpp')
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp150
1 files changed, 108 insertions, 42 deletions
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 0736ca25c40..c993be4192d 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -4629,23 +4629,50 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
}
if (n_th <= n_places) {
- int place = masters_place;
- int S = n_places / n_th;
- int s_count, rem, gap, gap_ct;
- rem = n_places - n_th * S;
- gap = rem ? n_th / rem : 1;
- gap_ct = gap;
- thidx = n_th;
- if (update_master_only == 1)
- thidx = 1;
- for (f = 0; f < thidx; f++) {
- kmp_info_t *th = team->t.t_threads[f];
- KMP_DEBUG_ASSERT(th != NULL);
+ int place = -1;
+
+ if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
+ int S = n_places / n_th;
+ int s_count, rem, gap, gap_ct;
+
+ place = masters_place;
+ rem = n_places - n_th * S;
+ gap = rem ? n_th / rem : 1;
+ gap_ct = gap;
+ thidx = n_th;
+ if (update_master_only == 1)
+ thidx = 1;
+ for (f = 0; f < thidx; f++) {
+ kmp_info_t *th = team->t.t_threads[f];
+ KMP_DEBUG_ASSERT(th != NULL);
+
+ th->th.th_first_place = place;
+ th->th.th_new_place = place;
+ s_count = 1;
+ while (s_count < S) {
+ if (place == last_place) {
+ place = first_place;
+ } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ place = 0;
+ } else {
+ place++;
+ }
+ s_count++;
+ }
+ if (rem && (gap_ct == gap)) {
+ if (place == last_place) {
+ place = first_place;
+ } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ place = 0;
+ } else {
+ place++;
+ }
+ rem--;
+ gap_ct = 0;
+ }
+ th->th.th_last_place = place;
+ gap_ct++;
- th->th.th_first_place = place;
- th->th.th_new_place = place;
- s_count = 1;
- while (s_count < S) {
if (place == last_place) {
place = first_place;
} else if (place == (int)(__kmp_affinity_num_masks - 1)) {
@@ -4653,35 +4680,74 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
} else {
place++;
}
- s_count++;
+
+ KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
+ "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
+ __kmp_gtid_from_thread(team->t.t_threads[f]),
+ team->t.t_id, f, th->th.th_new_place,
+ th->th.th_first_place, th->th.th_last_place,
+ __kmp_affinity_num_masks));
}
- if (rem && (gap_ct == gap)) {
- if (place == last_place) {
- place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
- place = 0;
- } else {
- place++;
+ } else {
+ /* Having uniform space of available computation places I can create
+ T partitions of round(P/T) size and put threads into the first
+ place of each partition. */
+ double current = static_cast<double>(masters_place);
+ double spacing =
+ (static_cast<double>(n_places + 1) / static_cast<double>(n_th));
+ int first, last;
+ kmp_info_t *th;
+
+ thidx = n_th + 1;
+ if (update_master_only == 1)
+ thidx = 1;
+ for (f = 0; f < thidx; f++) {
+ first = static_cast<int>(current);
+ last = static_cast<int>(current + spacing) - 1;
+ KMP_DEBUG_ASSERT(last >= first);
+ if (first >= n_places) {
+ if (masters_place) {
+ first -= n_places;
+ last -= n_places;
+ if (first == (masters_place + 1)) {
+ KMP_DEBUG_ASSERT(f == n_th);
+ first--;
+ }
+ if (last == masters_place) {
+ KMP_DEBUG_ASSERT(f == (n_th - 1));
+ last--;
+ }
+ } else {
+ KMP_DEBUG_ASSERT(f == n_th);
+ first = 0;
+ last = 0;
+ }
+ }
+ if (last >= n_places) {
+ last = (n_places - 1);
+ }
+ place = first;
+ current += spacing;
+ if (f < n_th) {
+ KMP_DEBUG_ASSERT(0 <= first);
+ KMP_DEBUG_ASSERT(n_places > first);
+ KMP_DEBUG_ASSERT(0 <= last);
+ KMP_DEBUG_ASSERT(n_places > last);
+ KMP_DEBUG_ASSERT(last_place >= first_place);
+ th = team->t.t_threads[f];
+ KMP_DEBUG_ASSERT(th);
+ th->th.th_first_place = first;
+ th->th.th_new_place = place;
+ th->th.th_last_place = last;
+
+ KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
+ "partition = [%d,%d], spacing = %.4f\n",
+ __kmp_gtid_from_thread(team->t.t_threads[f]),
+ team->t.t_id, f, th->th.th_new_place,
+ th->th.th_first_place, th->th.th_last_place,
+ spacing));
}
- rem--;
- gap_ct = 0;
- }
- th->th.th_last_place = place;
- gap_ct++;
-
- if (place == last_place) {
- place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
- place = 0;
- } else {
- place++;
}
-
- KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
- "partition = [%d,%d]\n",
- __kmp_gtid_from_thread(team->t.t_threads[f]),
- team->t.t_id, f, th->th.th_new_place,
- th->th.th_first_place, th->th.th_last_place));
}
KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
} else {
OpenPOWER on IntegriCloud