summaryrefslogtreecommitdiffstats
path: root/openmp/runtime/src/kmp_affinity.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'openmp/runtime/src/kmp_affinity.cpp')
-rw-r--r--openmp/runtime/src/kmp_affinity.cpp405
1 files changed, 268 insertions, 137 deletions
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 082ec9d6c73..d6821e0440f 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -1,7 +1,7 @@
/*
* kmp_affinity.cpp -- affinity management
- * $Revision: 42810 $
- * $Date: 2013-11-07 12:06:33 -0600 (Thu, 07 Nov 2013) $
+ * $Revision: 43473 $
+ * $Date: 2014-09-26 15:02:57 -0500 (Fri, 26 Sep 2014) $
*/
@@ -19,7 +19,7 @@
#include "kmp_i18n.h"
#include "kmp_io.h"
#include "kmp_str.h"
-
+#include "kmp_wrapper_getpid.h"
#if KMP_AFFINITY_SUPPORTED
@@ -49,7 +49,7 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
return buf;
}
- sprintf(scan, "{%ld", i);
+ sprintf(scan, "{%ld", (long)i);
while (*scan != '\0') scan++;
i++;
for (; i < KMP_CPU_SETSIZE; i++) {
@@ -66,7 +66,7 @@ __kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask)
if (end - scan < 15) {
break;
}
- sprintf(scan, ",%-ld", i);
+ sprintf(scan, ",%-ld", (long)i);
while (*scan != '\0') scan++;
}
if (i < KMP_CPU_SETSIZE) {
@@ -89,7 +89,6 @@ __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
if (__kmp_num_proc_groups > 1) {
int group;
- struct GROUP_AFFINITY ga;
KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
for (group = 0; group < __kmp_num_proc_groups; group++) {
int i;
@@ -315,6 +314,106 @@ __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
return 0;
}
+/** A structure for holding machine-specific hierarchy info to be computed once at init. */
+class hierarchy_info {
+public:
+ /** Typical levels are threads/core, cores/package or socket, packages/node, nodes/machine,
+ etc. We don't want to get specific with nomenclature */
+ static const kmp_uint32 maxLevels=7;
+
+ /** This is specifically the depth of the machine configuration hierarchy, in terms of the
+ number of levels along the longest path from root to any leaf. It corresponds to the
+ number of entries in numPerLevel if we exclude all but one trailing 1. */
+ kmp_uint32 depth;
+ kmp_uint32 base_depth;
+ kmp_uint32 base_num_threads;
+ bool uninitialized;
+
+ /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a
+ node at level i has. For example, if we have a machine with 4 packages, 4 cores/package
+ and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */
+ kmp_uint32 numPerLevel[maxLevels];
+ kmp_uint32 skipPerLevel[maxLevels];
+
+ void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
+ int hier_depth = adr2os[0].first.depth;
+ int level = 0;
+ for (int i=hier_depth-1; i>=0; --i) {
+ int max = -1;
+ for (int j=0; j<num_addrs; ++j) {
+ int next = adr2os[j].first.childNums[i];
+ if (next > max) max = next;
+ }
+ numPerLevel[level] = max+1;
+ ++level;
+ }
+ }
+
+ hierarchy_info() : depth(1), uninitialized(true) {}
+ void init(AddrUnsPair *adr2os, int num_addrs)
+ {
+ uninitialized = false;
+ for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = 1;
+ skipPerLevel[i] = 1;
+ }
+
+ // Sort table by physical ID
+ if (adr2os) {
+ qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
+ deriveLevels(adr2os, num_addrs);
+ }
+ else {
+ numPerLevel[0] = 4;
+ numPerLevel[1] = num_addrs/4;
+ if (num_addrs%4) numPerLevel[1]++;
+ }
+
+ base_num_threads = num_addrs;
+ for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
+ if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
+ depth++;
+
+ kmp_uint32 branch = 4;
+ if (numPerLevel[0] == 1) branch = num_addrs/4;
+ if (branch<4) branch=4;
+ for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
+ while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
+ if (numPerLevel[d] & 1) numPerLevel[d]++;
+ numPerLevel[d] = numPerLevel[d] >> 1;
+ if (numPerLevel[d+1] == 1) depth++;
+ numPerLevel[d+1] = numPerLevel[d+1] << 1;
+ }
+ if(numPerLevel[0] == 1) {
+ branch = branch >> 1;
+ if (branch<4) branch = 4;
+ }
+ }
+
+ for (kmp_uint32 i=1; i<depth; ++i)
+ skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
+
+ base_depth = depth;
+ }
+};
+
+static hierarchy_info machine_hierarchy;
+
+void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
+ if (machine_hierarchy.uninitialized)
+ machine_hierarchy.init(NULL, nproc);
+
+ if (nproc <= machine_hierarchy.base_num_threads)
+ machine_hierarchy.depth = machine_hierarchy.base_depth;
+ KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
+ while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
+ machine_hierarchy.depth++;
+ machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
+ }
+ thr_bar->depth = machine_hierarchy.depth;
+ thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
+ thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
+}
//
// When sorting by labels, __kmp_affinity_assign_child_nums() must first be
@@ -1963,7 +2062,7 @@ __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line,
// A newline has signalled the end of the processor record.
// Check that there aren't too many procs specified.
//
- if (num_avail == __kmp_xproc) {
+ if ((int)num_avail == __kmp_xproc) {
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_TooManyEntries;
return -1;
@@ -2587,7 +2686,7 @@ static int nextNewMask;
#define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
{ \
if (((_osId) > _maxOsId) || \
- (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX(_osId2Mask, (_osId))))) {\
+ (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
&& (__kmp_affinity_type != affinity_none))) { \
KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
@@ -3045,14 +3144,15 @@ __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
(*setSize)++;
}
*scan = next; // skip num
- }
+ }
else {
KMP_ASSERT2(0, "bad explicit places list");
}
}
-static void
+//static void
+void
__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
unsigned int *out_numMasks, const char *placelist,
kmp_affin_mask_t *osId2Mask, int maxOsId)
@@ -3109,71 +3209,41 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
// valid follow sets are ',' ':' and EOL
//
SKIP_WS(scan);
+ int stride;
if (*scan == '\0' || *scan == ',') {
- int i;
- for (i = 0; i < count; i++) {
- int j;
- if (setSize == 0) {
- break;
- }
- ADD_MASK(tempMask);
- setSize = 0;
- for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j > 0; j--) {
- //
- // Use a temp var in case macro is changed to evaluate
- // args multiple times.
- //
- if (KMP_CPU_ISSET(j - 1, tempMask)) {
- KMP_CPU_SET(j, tempMask);
- setSize++;
- }
- else {
- KMP_CPU_CLR(j, tempMask);
- }
+ stride = +1;
+ }
+ else {
+ KMP_ASSERT2(*scan == ':', "bad explicit places list");
+ scan++; // skip ':'
+
+ //
+ // Read stride parameter
+ //
+ int sign = +1;
+ for (;;) {
+ SKIP_WS(scan);
+ if (*scan == '+') {
+ scan++; // skip '+'
+ continue;
}
- for (; j >= 0; j--) {
- KMP_CPU_CLR(j, tempMask);
+ if (*scan == '-') {
+ sign *= -1;
+ scan++; // skip '-'
+ continue;
}
- }
- KMP_CPU_ZERO(tempMask);
- setSize = 0;
-
- if (*scan == '\0') {
break;
}
- scan++; // skip ','
- continue;
- }
-
- KMP_ASSERT2(*scan == ':', "bad explicit places list");
- scan++; // skip ':'
-
- //
- // Read stride parameter
- //
- int sign = +1;
- for (;;) {
SKIP_WS(scan);
- if (*scan == '+') {
- scan++; // skip '+'
- continue;
- }
- if (*scan == '-') {
- sign *= -1;
- scan++; // skip '-'
- continue;
- }
- break;
+ KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
+ "bad explicit places list");
+ next = scan;
+ SKIP_DIGITS(next);
+ stride = __kmp_str_to_int(scan, *next);
+ KMP_DEBUG_ASSERT(stride >= 0);
+ scan = next;
+ stride *= sign;
}
- SKIP_WS(scan);
- KMP_ASSERT2((*scan >= '0') && (*scan <= '9'),
- "bad explicit places list");
- next = scan;
- SKIP_DIGITS(next);
- int stride = __kmp_str_to_int(scan, *next);
- KMP_DEBUG_ASSERT(stride >= 0);
- scan = next;
- stride *= sign;
if (stride > 0) {
int i;
@@ -3185,13 +3255,21 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
ADD_MASK(tempMask);
setSize = 0;
for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
- if (KMP_CPU_ISSET(j - stride, tempMask)) {
- KMP_CPU_SET(j, tempMask);
- setSize++;
+ if (! KMP_CPU_ISSET(j - stride, tempMask)) {
+ KMP_CPU_CLR(j, tempMask);
}
- else {
+ else if ((j > maxOsId) ||
+ (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
+ if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+ && (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, j);
+ }
KMP_CPU_CLR(j, tempMask);
}
+ else {
+ KMP_CPU_SET(j, tempMask);
+ setSize++;
+ }
}
for (; j >= 0; j--) {
KMP_CPU_CLR(j, tempMask);
@@ -3201,23 +3279,31 @@ __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
else {
int i;
for (i = 0; i < count; i++) {
- unsigned j;
+ int j;
if (setSize == 0) {
break;
}
ADD_MASK(tempMask);
setSize = 0;
- for (j = 0; j < (__kmp_affin_mask_size * CHAR_BIT) + stride;
+ for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
j++) {
- if (KMP_CPU_ISSET(j - stride, tempMask)) {
- KMP_CPU_SET(j, tempMask);
- setSize++;
+ if (! KMP_CPU_ISSET(j - stride, tempMask)) {
+ KMP_CPU_CLR(j, tempMask);
}
- else {
+ else if ((j > maxOsId) ||
+ (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
+ if (__kmp_affinity_verbose || (__kmp_affinity_warnings
+ && (__kmp_affinity_type != affinity_none))) {
+ KMP_WARNING(AffIgnoreInvalidProcID, j);
+ }
KMP_CPU_CLR(j, tempMask);
}
+ else {
+ KMP_CPU_SET(j, tempMask);
+ setSize++;
+ }
}
- for (; j < __kmp_affin_mask_size * CHAR_BIT; j++) {
+ for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
KMP_CPU_CLR(j, tempMask);
}
}
@@ -3270,9 +3356,13 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth)
}
__kmp_place_num_cores = nCoresPerPkg; // use all available cores
}
- if ( !__kmp_affinity_uniform_topology() || depth != 3 ) {
- KMP_WARNING( AffThrPlaceUnsupported );
- return; // don't support non-uniform topology or not-3-level architecture
+ if ( !__kmp_affinity_uniform_topology() ) {
+ KMP_WARNING( AffThrPlaceNonUniform );
+ return; // don't support non-uniform topology
+ }
+ if ( depth != 3 ) {
+ KMP_WARNING( AffThrPlaceNonThreeLevel );
+ return; // don't support not-3-level topology
}
if ( __kmp_place_num_threads_per_core == 0 ) {
__kmp_place_num_threads_per_core = __kmp_nThreadsPerCore; // use all HW contexts
@@ -3400,18 +3490,14 @@ __kmp_aux_affinity_initialize(void)
}
if (depth < 0) {
- if ((msg_id != kmp_i18n_null)
- && (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none)))) {
-# if KMP_MIC
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity_verbose) {
+ if (msg_id != kmp_i18n_null) {
KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
KMP_I18N_STR(DecodingLegacyAPIC));
}
-# else
- KMP_WARNING(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
- KMP_I18N_STR(DecodingLegacyAPIC));
-# endif
+ else {
+ KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
+ }
}
file_name = NULL;
@@ -3428,19 +3514,13 @@ __kmp_aux_affinity_initialize(void)
# if KMP_OS_LINUX
if (depth < 0) {
- if ((msg_id != kmp_i18n_null)
- && (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none)))) {
-# if KMP_MIC
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity_verbose) {
+ if (msg_id != kmp_i18n_null) {
KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
}
-# else
- KMP_WARNING(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo");
-# endif
- }
- else if (__kmp_affinity_verbose) {
- KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
+ else {
+ KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo");
+ }
}
FILE *f = fopen("/proc/cpuinfo", "r");
@@ -3461,20 +3541,32 @@ __kmp_aux_affinity_initialize(void)
# endif /* KMP_OS_LINUX */
+# if KMP_OS_WINDOWS && KMP_ARCH_X86_64
+
+ if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
+ if (__kmp_affinity_verbose) {
+ KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
+ }
+
+ depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
+ KMP_ASSERT(depth != 0);
+ }
+
+# endif /* KMP_OS_WINDOWS && KMP_ARCH_X86_64 */
+
if (depth < 0) {
- if (msg_id != kmp_i18n_null
- && (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none)))) {
+ if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
if (file_name == NULL) {
- KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
+ KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
}
else if (line == 0) {
- KMP_WARNING(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
+ KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
}
else {
- KMP_WARNING(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
+ KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
}
}
+ // FIXME - print msg if msg_id = kmp_i18n_null ???
file_name = "";
depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
@@ -3508,7 +3600,6 @@ __kmp_aux_affinity_initialize(void)
KMP_ASSERT(address2os == NULL);
return;
}
-
if (depth < 0) {
KMP_ASSERT(msg_id != kmp_i18n_null);
KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
@@ -3526,7 +3617,6 @@ __kmp_aux_affinity_initialize(void)
KMP_ASSERT(address2os == NULL);
return;
}
-
if (depth < 0) {
KMP_ASSERT(msg_id != kmp_i18n_null);
KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
@@ -3597,23 +3687,9 @@ __kmp_aux_affinity_initialize(void)
depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
KMP_ASSERT(depth != 0);
-
if (depth < 0) {
- if ((msg_id != kmp_i18n_null)
- && (__kmp_affinity_verbose || (__kmp_affinity_warnings
- && (__kmp_affinity_type != affinity_none)))) {
- KMP_WARNING(UsingFlatOS, __kmp_i18n_catgets(msg_id));
- }
-
- depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
- if (depth == 0) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- KMP_ASSERT(address2os == NULL);
- return;
- }
- // should not fail
- KMP_ASSERT(depth > 0);
- KMP_ASSERT(address2os != NULL);
+ KMP_ASSERT(msg_id != kmp_i18n_null);
+ KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
}
}
@@ -3658,7 +3734,7 @@ __kmp_aux_affinity_initialize(void)
kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
address2os, __kmp_avail_proc);
if (__kmp_affinity_gran_levels == 0) {
- KMP_DEBUG_ASSERT(numUnique == __kmp_avail_proc);
+ KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
}
//
@@ -3852,6 +3928,7 @@ __kmp_aux_affinity_initialize(void)
}
__kmp_free(osId2Mask);
+ machine_hierarchy.init(address2os, __kmp_avail_proc);
}
@@ -3953,7 +4030,7 @@ __kmp_affinity_set_init_mask(int gtid, int isa_root)
}
# endif
KMP_ASSERT(fullMask != NULL);
- i = -1;
+ i = KMP_PLACE_ALL;
mask = fullMask;
}
else {
@@ -4020,7 +4097,8 @@ __kmp_affinity_set_init_mask(int gtid, int isa_root)
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", gtid, buf);
+ KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), gtid,
+ buf);
}
# if KMP_OS_WINDOWS
@@ -4058,14 +4136,14 @@ __kmp_affinity_set_place(int gtid)
// Check that the new place is within this thread's partition.
//
KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
- KMP_DEBUG_ASSERT(th->th.th_new_place >= 0);
- KMP_DEBUG_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
+ KMP_ASSERT(th->th.th_new_place >= 0);
+ KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
if (th->th.th_first_place <= th->th.th_last_place) {
- KMP_DEBUG_ASSERT((th->th.th_new_place >= th->th.th_first_place)
+ KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
&& (th->th.th_new_place <= th->th.th_last_place));
}
else {
- KMP_DEBUG_ASSERT((th->th.th_new_place <= th->th.th_first_place)
+ KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
|| (th->th.th_new_place >= th->th.th_last_place));
}
@@ -4082,7 +4160,8 @@ __kmp_affinity_set_place(int gtid)
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
- KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", gtid, buf);
+ KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(),
+ gtid, buf);
}
__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
}
@@ -4153,6 +4232,11 @@ __kmp_aux_set_affinity(void **mask)
th->th.th_new_place = KMP_PLACE_UNDEFINED;
th->th.th_first_place = 0;
th->th.th_last_place = __kmp_affinity_num_masks - 1;
+
+ //
+ // Turn off 4.0 affinity for the current tread at this parallel level.
+ //
+ th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
# endif
return retval;
@@ -4207,7 +4291,6 @@ __kmp_aux_get_affinity(void **mask)
}
-
int
__kmp_aux_set_affinity_mask_proc(int proc, void **mask)
{
@@ -4360,7 +4443,8 @@ void __kmp_balanced_affinity( int tid, int nthreads )
if (__kmp_affinity_verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
+ KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
+ tid, buf);
}
__kmp_set_system_affinity( mask, TRUE );
} else { // Non-uniform topology
@@ -4535,7 +4619,8 @@ void __kmp_balanced_affinity( int tid, int nthreads )
if (__kmp_affinity_verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", tid, buf);
+ KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
+ tid, buf);
}
__kmp_set_system_affinity( mask, TRUE );
}
@@ -4543,4 +4628,50 @@ void __kmp_balanced_affinity( int tid, int nthreads )
# endif /* KMP_MIC */
+#else
+ // affinity not supported
+
+kmp_uint32 mac_skipPerLevel[7];
+kmp_uint32 mac_depth;
+kmp_uint8 mac_leaf_kids;
+void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
+ static int first = 1;
+ if (first) {
+ const kmp_uint32 maxLevels = 7;
+ kmp_uint32 numPerLevel[maxLevels];
+
+ for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
+ numPerLevel[i] = 1;
+ mac_skipPerLevel[i] = 1;
+ }
+
+ mac_depth = 2;
+ numPerLevel[0] = nproc;
+
+ kmp_uint32 branch = 4;
+ if (numPerLevel[0] == 1) branch = nproc/4;
+ if (branch<4) branch=4;
+ for (kmp_uint32 d=0; d<mac_depth-1; ++d) { // optimize hierarchy width
+ while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) { // max 4 on level 0!
+ if (numPerLevel[d] & 1) numPerLevel[d]++;
+ numPerLevel[d] = numPerLevel[d] >> 1;
+ if (numPerLevel[d+1] == 1) mac_depth++;
+ numPerLevel[d+1] = numPerLevel[d+1] << 1;
+ }
+ if(numPerLevel[0] == 1) {
+ branch = branch >> 1;
+ if (branch<4) branch = 4;
+ }
+ }
+
+ for (kmp_uint32 i=1; i<mac_depth; ++i)
+ mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
+ mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
+ first=0;
+ }
+ thr_bar->depth = mac_depth;
+ thr_bar->base_leaf_kids = mac_leaf_kids;
+ thr_bar->skip_per_level = mac_skipPerLevel;
+}
+
#endif // KMP_AFFINITY_SUPPORTED
OpenPOWER on IntegriCloud